libdap Updated for version 3.18.1
HTTPConnect.cc
1
2// -*- mode: c++; c-basic-offset:4 -*-
3
4// This file is part of libdap, A C++ implementation of the OPeNDAP Data
5// Access Protocol.
6
7// Copyright (c) 2002,2003 OPeNDAP, Inc.
8// Author: James Gallagher <jgallagher@opendap.org>
9//
10// This library is free software; you can redistribute it and/or
11// modify it under the terms of the GNU Lesser General Public
12// License as published by the Free Software Foundation; either
13// version 2.1 of the License, or (at your option) any later version.
14//
15// This library is distributed in the hope that it will be useful,
16// but WITHOUT ANY WARRANTY; without even the implied warranty of
17// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
18// Lesser General Public License for more details.
19//
20// You should have received a copy of the GNU Lesser General Public
21// License along with this library; if not, write to the Free Software
22// Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
23//
24// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
25
26
27#include "config.h"
28
29#ifdef HAVE_UNISTD_H
30#include <unistd.h>
31#endif
32
33#include <sys/stat.h>
34
35#ifdef WIN32
36#include <io.h>
37#endif
38
39#include <string>
40#include <vector>
41#include <functional>
42#include <algorithm>
43#include <sstream>
44#include <fstream>
45#include <iterator>
46#include <cstdlib>
47#include <cstring>
48#include <cerrno>
49
50//#define DODS_DEBUG2
51//#define HTTP_TRACE
52//#define DODS_DEBUG
53
54#undef USE_GETENV
55
56
57#include "debug.h"
58#include "mime_util.h"
59#include "media_types.h"
60#include "GNURegex.h"
61#include "HTTPCache.h"
62#include "HTTPConnect.h"
63#include "RCReader.h"
64#include "HTTPResponse.h"
65#include "HTTPCacheResponse.h"
66
67using namespace std;
68
69namespace libdap {
70
71// These global variables are not MT-Safe, but I'm leaving them as is because
72// they are used only for debugging (set them in a debugger like gdb or ddd).
73// They are not static because I think that many debuggers cannot access
74// static variables. 08/07/02 jhrg
75
76// Set this to 1 to turn on libcurl's verbose mode (for debugging).
77int www_trace = 0;
78
79// Keep the temporary files; useful for debugging.
80int dods_keep_temps = 0;
81
82#define CLIENT_ERR_MIN 400
83#define CLIENT_ERR_MAX 417
84static const char *http_client_errors[CLIENT_ERR_MAX - CLIENT_ERR_MIN +1] =
85 {
86 "Bad Request:",
87 "Unauthorized: Contact the server administrator.",
88 "Payment Required.",
89 "Forbidden: Contact the server administrator.",
90 "Not Found: The data source or server could not be found.\n\
91 Often this means that the OPeNDAP server is missing or needs attention;\n\
92 Please contact the server administrator.",
93 "Method Not Allowed.",
94 "Not Acceptable.",
95 "Proxy Authentication Required.",
96 "Request Time-out.",
97 "Conflict.",
98 "Gone:.",
99 "Length Required.",
100 "Precondition Failed.",
101 "Request Entity Too Large.",
102 "Request URI Too Large.",
103 "Unsupported Media Type.",
104 "Requested Range Not Satisfiable.",
105 "Expectation Failed."
106 };
107
108#define SERVER_ERR_MIN 500
109#define SERVER_ERR_MAX 505
110static const char *http_server_errors[SERVER_ERR_MAX - SERVER_ERR_MIN + 1] =
111 {
112 "Internal Server Error.",
113 "Not Implemented.",
114 "Bad Gateway.",
115 "Service Unavailable.",
116 "Gateway Time-out.",
117 "HTTP Version Not Supported."
118 };
119
122static string
123http_status_to_string(int status)
124{
125 if (status >= CLIENT_ERR_MIN && status <= CLIENT_ERR_MAX)
126 return string(http_client_errors[status - CLIENT_ERR_MIN]);
127 else if (status >= SERVER_ERR_MIN && status <= SERVER_ERR_MAX)
128 return string(http_server_errors[status - SERVER_ERR_MIN]);
129 else
130 return string("Unknown Error: This indicates a problem with libdap++.\nPlease report this to support@opendap.org.");
131}
132
133static ObjectType
134determine_object_type(const string &header_value)
135{
136 // DAP4 Data: application/vnd.opendap.dap4.data
137 // DAP4 DMR: application/vnd.opendap.dap4.dataset-metadata+xml
138
139 string::size_type plus = header_value.find('+');
140 string base_type;
141 string type_extension = "";
142 if (plus != string::npos) {
143 base_type= header_value.substr(0, plus);
144 type_extension = header_value.substr(plus+1);
145 }
146 else
147 base_type = header_value;
148
149 if (base_type == DMR_Content_Type
150 || (base_type.find("application/") != string::npos
151 && base_type.find("dap4.dataset-metadata") != string::npos)) {
152 if (type_extension == "xml")
153 return dap4_dmr;
154 else
155 return unknown_type;
156 }
157 else if (base_type == DAP4_DATA_Content_Type
158 || (base_type.find("application/") != string::npos
159 && base_type.find("dap4.data") != string::npos)) {
160 return dap4_data;
161 }
162 else if (header_value.find("text/html") != string::npos) {
163 return web_error;
164 }
165 else
166 return unknown_type;
167}
168
173class ParseHeader : public unary_function<const string &, void>
174{
175 ObjectType type; // What type of object is in the stream?
176 string server; // Server's version string.
177 string protocol; // Server's protocol version.
178 string location; // Url returned by server
179
180public:
181 ParseHeader() : type(unknown_type), server("dods/0.0"), protocol("2.0")
182 { }
183
184 void operator()(const string &line)
185 {
186 string name, value;
187 parse_mime_header(line, name, value);
188
189 DBG2(cerr << name << ": " << value << endl);
190
191 // Content-Type is used to determine the content of DAP4 responses, but allow the
192 // Content-Description header to override CT o preserve operation with DAP2 servers.
193 // jhrg 11/12/13
194 if (type == unknown_type && name == "content-type") {
195 type = determine_object_type(value); // see above
196 }
197 if (name == "content-description" && !(type == dap4_dmr || type == dap4_data || type == dap4_error)) {
198 type = get_description_type(value); // defined in mime_util.cc
199 }
200 // The second test (== "dods/0.0") tests if xopendap-server has already
201 // been seen. If so, use that header in preference to the old
202 // XDODS-Server header. jhrg 2/7/06
203 else if (name == "xdods-server" && server == "dods/0.0") {
204 server = value;
205 }
206 else if (name == "xopendap-server") {
207 server = value;
208 }
209 else if (name == "xdap") {
210 protocol = value;
211 }
212 else if (server == "dods/0.0" && name == "server") {
213 server = value;
214 }
215 else if (name == "location") {
216 location = value;
217 }
218 }
219
220 ObjectType get_object_type()
221 {
222 return type;
223 }
224
225 string get_server()
226 {
227 return server;
228 }
229
230 string get_protocol()
231 {
232 return protocol;
233 }
234
235 string get_location() {
236 return location;
237 }
238};
239
255static size_t
256save_raw_http_headers(void *ptr, size_t size, size_t nmemb, void *resp_hdrs)
257{
258 DBG2(cerr << "Inside the header parser." << endl);
259 vector<string> *hdrs = static_cast<vector<string> * >(resp_hdrs);
260
261 // Grab the header, minus the trailing newline. Or \r\n pair.
262 string complete_line;
263 if (nmemb > 1 && *(static_cast<char*>(ptr) + size * (nmemb - 2)) == '\r')
264 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 2));
265 else
266 complete_line.assign(static_cast<char *>(ptr), size * (nmemb - 1));
267
268 // Store all non-empty headers that are not HTTP status codes
269 if (complete_line != "" && complete_line.find("HTTP") == string::npos) {
270 DBG(cerr << "Header line: " << complete_line << endl);
271 hdrs->push_back(complete_line);
272 }
273
274 return size * nmemb;
275}
276
278static int
279curl_debug(CURL *, curl_infotype info, char *msg, size_t size, void *)
280{
281 string message(msg, size);
282
283 switch (info) {
284 case CURLINFO_TEXT:
285 cerr << "Text: " << message; break;
286 case CURLINFO_HEADER_IN:
287 cerr << "Header in: " << message; break;
288 case CURLINFO_HEADER_OUT:
289 cerr << "Header out: " << message; break;
290 case CURLINFO_DATA_IN:
291 cerr << "Data in: " << message; break;
292 case CURLINFO_DATA_OUT:
293 cerr << "Data out: " << message; break;
294 case CURLINFO_END:
295 cerr << "End: " << message; break;
296#ifdef CURLINFO_SSL_DATA_IN
297 case CURLINFO_SSL_DATA_IN:
298 cerr << "SSL Data in: " << message; break;
299#endif
300#ifdef CURLINFO_SSL_DATA_OUT
301 case CURLINFO_SSL_DATA_OUT:
302 cerr << "SSL Data out: " << message; break;
303#endif
304 default:
305 cerr << "Curl info: " << message; break;
306 }
307 return 0;
308}
309
313void
314HTTPConnect::www_lib_init()
315{
316 d_curl = curl_easy_init();
317 if (!d_curl)
318 throw InternalErr(__FILE__, __LINE__, "Could not initialize libcurl.");
319
320 // Now set options that will remain constant for the duration of this
321 // CURL object.
322
323 // Set the proxy host.
324 if (!d_rcr->get_proxy_server_host().empty()) {
325 DBG(cerr << "Setting up a proxy server." << endl);
326 DBG(cerr << "Proxy host: " << d_rcr->get_proxy_server_host()
327 << endl);
328 DBG(cerr << "Proxy port: " << d_rcr->get_proxy_server_port()
329 << endl);
330 DBG(cerr << "Proxy pwd : " << d_rcr->get_proxy_server_userpw()
331 << endl);
332 curl_easy_setopt(d_curl, CURLOPT_PROXY,
333 d_rcr->get_proxy_server_host().c_str());
334 curl_easy_setopt(d_curl, CURLOPT_PROXYPORT,
335 d_rcr->get_proxy_server_port());
336
337 // As of 4/21/08 only NTLM, Digest and Basic work.
338#ifdef CURLOPT_PROXYAUTH
339 curl_easy_setopt(d_curl, CURLOPT_PROXYAUTH, (long)CURLAUTH_ANY);
340#endif
341
342 // Password might not be required. 06/21/04 jhrg
343 if (!d_rcr->get_proxy_server_userpw().empty())
344 curl_easy_setopt(d_curl, CURLOPT_PROXYUSERPWD,
345 d_rcr->get_proxy_server_userpw().c_str());
346 }
347
348 curl_easy_setopt(d_curl, CURLOPT_ERRORBUFFER, d_error_buffer);
349 // We have to set FailOnError to false for any of the non-Basic
350 // authentication schemes to work. 07/28/03 jhrg
351 curl_easy_setopt(d_curl, CURLOPT_FAILONERROR, 0);
352
353 // This means libcurl will use Basic, Digest, GSS Negotiate, or NTLM,
354 // choosing the the 'safest' one supported by the server.
355 // This requires curl 7.10.6 which is still in pre-release. 07/25/03 jhrg
356 curl_easy_setopt(d_curl, CURLOPT_HTTPAUTH, (long)CURLAUTH_ANY);
357
358 curl_easy_setopt(d_curl, CURLOPT_NOPROGRESS, 1);
359 curl_easy_setopt(d_curl, CURLOPT_NOSIGNAL, 1);
360 curl_easy_setopt(d_curl, CURLOPT_HEADERFUNCTION, save_raw_http_headers);
361 // In read_url a call to CURLOPT_WRITEHEADER is used to set the fourth
362 // param of save_raw_http_headers to a vector<string> object.
363
364 // Follow 302 (redirect) responses
365 curl_easy_setopt(d_curl, CURLOPT_FOLLOWLOCATION, 1);
366 curl_easy_setopt(d_curl, CURLOPT_MAXREDIRS, 5);
367
368 // If the user turns off SSL validation...
369 if (d_rcr->get_validate_ssl() == 0) {
370 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYPEER, 0);
371 curl_easy_setopt(d_curl, CURLOPT_SSL_VERIFYHOST, 0);
372 }
373
374 // Look to see if cookies are turned on in the .dodsrc file. If so,
375 // activate here. We honor 'session cookies' (cookies without an
376 // expiration date) here so that session-based SSO systems will work as
377 // expected.
378 if (!d_cookie_jar.empty()) {
379 DBG(cerr << "Setting the cookie jar to: " << d_cookie_jar << endl);
380 curl_easy_setopt(d_curl, CURLOPT_COOKIEJAR, d_cookie_jar.c_str());
381 curl_easy_setopt(d_curl, CURLOPT_COOKIESESSION, 1);
382 }
383
384 if (www_trace) {
385 cerr << "Curl version: " << curl_version() << endl;
386 curl_easy_setopt(d_curl, CURLOPT_VERBOSE, 1);
387 curl_easy_setopt(d_curl, CURLOPT_DEBUGFUNCTION, curl_debug);
388 }
389}
390
394class BuildHeaders : public unary_function<const string &, void>
395{
396 struct curl_slist *d_cl;
397
398public:
399 BuildHeaders() : d_cl(0)
400 {}
401
402 void operator()(const string &header)
403 {
404 DBG(cerr << "Adding '" << header.c_str() << "' to the header list."
405 << endl);
406 d_cl = curl_slist_append(d_cl, header.c_str());
407 }
408
409 struct curl_slist *get_headers()
410 {
411 return d_cl;
412 }
413};
414
429long
430HTTPConnect::read_url(const string &url, FILE *stream, vector<string> *resp_hdrs, const vector<string> *headers)
431{
432 curl_easy_setopt(d_curl, CURLOPT_URL, url.c_str());
433
434#ifdef WIN32
435 // See the curl documentation for CURLOPT_FILE (aka CURLOPT_WRITEDATA)
436 // and the CURLOPT_WRITEFUNCTION option. Quote: "If you are using libcurl as
437 // a win32 DLL, you MUST use the CURLOPT_WRITEFUNCTION option if you set the
438 // CURLOPT_WRITEDATA option or you will experience crashes". At the root of
439 // this issue is that one should not pass a FILE * to a windows DLL. Close
440 // inspection of libcurl yields that their default write function when using
441 // the CURLOPT_WRITEDATA is just "fwrite".
442 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
443 curl_easy_setopt(d_curl, CURLOPT_WRITEFUNCTION, &fwrite);
444#else
445 curl_easy_setopt(d_curl, CURLOPT_WRITEDATA, stream);
446#endif
447
448 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
449 ostream_iterator<string>(cerr, "\n")));
450
451 BuildHeaders req_hdrs;
452 req_hdrs = for_each(d_request_headers.begin(), d_request_headers.end(),
453 req_hdrs);
454 if (headers)
455 req_hdrs = for_each(headers->begin(), headers->end(), req_hdrs);
456
457 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, req_hdrs.get_headers());
458
459 // Turn off the proxy for this URL?
460 bool temporary_proxy = false;
461 if ((temporary_proxy = url_uses_no_proxy_for(url))) {
462 DBG(cerr << "Suppress proxy for url: " << url << endl);
463 curl_easy_setopt(d_curl, CURLOPT_PROXY, 0);
464 }
465
466 string::size_type at_sign = url.find('@');
467 // Assume username:password present *and* assume it's an HTTP URL; it *is*
468 // HTTPConnect, after all. 7 is position after "http://"; the second arg
469 // to substr() is the sub string length.
470 if (at_sign != url.npos)
471 d_upstring = url.substr(7, at_sign - 7);
472
473 if (!d_upstring.empty())
474 curl_easy_setopt(d_curl, CURLOPT_USERPWD, d_upstring.c_str());
475
476 // Pass save_raw_http_headers() a pointer to the vector<string> where the
477 // response headers may be stored. Callers can use the resp_hdrs
478 // value/result parameter to get the raw response header information .
479 curl_easy_setopt(d_curl, CURLOPT_WRITEHEADER, resp_hdrs);
480
481 // This is the call that causes curl to go and get the remote resource and "write it down"
482 // utilizing the configuration state that has been previously conditioned by various perturbations
483 // of calls to curl_easy_setopt().
484 CURLcode res = curl_easy_perform(d_curl);
485
486 // Free the header list and null the value in d_curl.
487 curl_slist_free_all(req_hdrs.get_headers());
488 curl_easy_setopt(d_curl, CURLOPT_HTTPHEADER, 0);
489
490 // Reset the proxy?
491 if (temporary_proxy && !d_rcr->get_proxy_server_host().empty())
492 curl_easy_setopt(d_curl, CURLOPT_PROXY,
493 d_rcr->get_proxy_server_host().c_str());
494
495 if (res != 0)
496 throw Error(d_error_buffer);
497
498 long status;
499 res = curl_easy_getinfo(d_curl, CURLINFO_HTTP_CODE, &status);
500 if (res != 0)
501 throw Error(d_error_buffer);
502
503 char *ct_ptr = 0;
504 res = curl_easy_getinfo(d_curl, CURLINFO_CONTENT_TYPE, &ct_ptr);
505 if (res == CURLE_OK && ct_ptr)
506 d_content_type = ct_ptr;
507 else
508 d_content_type = "";
509
510 return status;
511}
512
516bool
517HTTPConnect::url_uses_proxy_for(const string &url)
518{
519 if (d_rcr->is_proxy_for_used()) {
520 Regex host_regex(d_rcr->get_proxy_for_regexp().c_str());
521 int index = 0, matchlen;
522 return host_regex.search(url.c_str(), url.size(), matchlen, index) != -1;
523 }
524
525 return false;
526}
527
531bool
532HTTPConnect::url_uses_no_proxy_for(const string &url) throw()
533{
534 return d_rcr->is_no_proxy_for_used()
535 && url.find(d_rcr->get_no_proxy_for_host()) != string::npos;
536}
537
538// Public methods. Mostly...
539
546HTTPConnect::HTTPConnect(RCReader *rcr, bool use_cpp) : d_username(""), d_password(""), d_cookie_jar(""),
547 d_dap_client_protocol_major(2), d_dap_client_protocol_minor(0), d_use_cpp_streams(use_cpp)
548
549{
550 d_accept_deflate = rcr->get_deflate();
551 d_rcr = rcr;
552
553 // Load in the default headers to send with a request. The empty Pragma
554 // headers overrides libcurl's default Pragma: no-cache header (which
555 // will disable caching by Squid, et c.). The User-Agent header helps
556 // make server logs more readable. 05/05/03 jhrg
557 d_request_headers.push_back(string("Pragma:"));
558 string user_agent = string("User-Agent: ") + string(CNAME)
559 + string("/") + string(CVER);
560 d_request_headers.push_back(user_agent);
561 if (d_accept_deflate)
562 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
563
564 // HTTPCache::instance returns a valid ptr or 0.
565 if (d_rcr->get_use_cache())
566 d_http_cache = HTTPCache::instance(d_rcr->get_dods_cache_root(),true);
567 else
568 d_http_cache = 0;
569
570 DBG2(cerr << "Cache object created (" << hex << d_http_cache << dec
571 << ")" << endl);
572
573 if (d_http_cache) {
574 d_http_cache->set_cache_enabled(d_rcr->get_use_cache());
575 d_http_cache->set_expire_ignored(d_rcr->get_ignore_expires() != 0);
576 d_http_cache->set_max_size(d_rcr->get_max_cache_size());
577 d_http_cache->set_max_entry_size(d_rcr->get_max_cached_obj());
578 d_http_cache->set_default_expiration(d_rcr->get_default_expires());
579 d_http_cache->set_always_validate(d_rcr->get_always_validate() != 0);
580 }
581
582 d_cookie_jar = rcr->get_cookie_jar();
583
584 www_lib_init(); // This may throw either Error or InternalErr
585}
586
587HTTPConnect::~HTTPConnect()
588{
589 DBG2(cerr << "Entering the HTTPConnect dtor" << endl);
590
591 curl_easy_cleanup(d_curl);
592
593 DBG2(cerr << "Leaving the HTTPConnect dtor" << endl);
594}
595
597class HeaderMatch : public unary_function<const string &, bool> {
598 const string &d_header;
599 public:
600 HeaderMatch(const string &header) : d_header(header) {}
601 bool operator()(const string &arg) { return arg.find(d_header) == 0; }
602};
603
616HTTPResponse *
617HTTPConnect::fetch_url(const string &url)
618{
619#ifdef HTTP_TRACE
620 cout << "GET " << url << " HTTP/1.0" << endl;
621#endif
622
623 HTTPResponse *stream;
624
625 if (/*d_http_cache && d_http_cache->*/is_cache_enabled()) {
626 stream = caching_fetch_url(url);
627 }
628 else {
629 stream = plain_fetch_url(url);
630 }
631
632#ifdef HTTP_TRACE
633 stringstream ss;
634 ss << "HTTP/1.0 " << stream->get_status() << " -" << endl;
635 for (size_t i = 0; i < stream->get_headers()->size(); i++) {
636 ss << stream->get_headers()->at(i) << endl;
637 }
638 cout << ss.str();
639#endif
640
641 ParseHeader parser;
642
643 // An apparent quirk of libcurl is that it does not pass the Content-type
644 // header to the callback used to save them, but check and add it from the
645 // saved state variable only if it's not there (without this a test failed
646 // in HTTPCacheTest). jhrg 11/12/13
647 if (!d_content_type.empty() && find_if(stream->get_headers()->begin(), stream->get_headers()->end(),
648 HeaderMatch("Content-Type:")) == stream->get_headers()->end())
649 stream->get_headers()->push_back("Content-Type: " + d_content_type);
650
651 parser = for_each(stream->get_headers()->begin(), stream->get_headers()->end(), ParseHeader());
652
653#ifdef HTTP_TRACE
654 cout << endl << endl;
655#endif
656
657 // handle redirection case (2007-04-27, gaffigan@sfos.uaf.edu)
658 if (parser.get_location() != "" &&
659 url.substr(0,url.find("?",0)).compare(parser.get_location().substr(0,url.find("?",0))) != 0) {
660 delete stream;
661 return fetch_url(parser.get_location());
662 }
663
664 stream->set_type(parser.get_object_type()); // uses the value of content-description
665
666 stream->set_version(parser.get_server());
667 stream->set_protocol(parser.get_protocol());
668
669 if (d_use_cpp_streams) {
670 stream->transform_to_cpp();
671 }
672
673 return stream;
674}
675
676// Look around for a reasonable place to put a temporary file. Check first
677// the value of the TMPDIR env var. If that does not yeild a path that's
678// writable (as defined by access(..., W_OK|R_OK)) then look at P_tmpdir (as
679// defined in stdio.h. If both come up empty, then use `./'.
680
681// Change this to a version that either returns a string or an open file
682// descriptor. Use information from https://buildsecurityin.us-cert.gov/
683// (see open()) to make it more secure. Ideal solution: get deserialize()
684// methods to read from a stream returned by libcurl, not from a temporary
685// file. 9/21/07 jhrg Updated to use strings, other misc changes. 3/22/11
686static string
687get_tempfile_template(const string &file_template)
688{
689 string c;
690
691 // Windows has one idea of the standard name(s) for a temporary files dir
692#ifdef WIN32
693 // white list for a WIN32 directory
694 Regex directory("[-a-zA-Z0-9_:\\]*");
695
696 // If we're OK to use getenv(), try it.
697#ifdef USE_GETENV
698 c = getenv("TEMP");
699 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
700 goto valid_temp_directory;
701
702 c= getenv("TMP");
703 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
704 goto valid_temp_directory;
705#endif // USE_GETENV
706
707 // The windows default
708 c = "c:\tmp";
709 if (c && directory.match(c.c_str(), c.length()) && (access(c.c_str(), 6) == 0))
710 goto valid_temp_directory;
711
712#else // Unix/Linux/OSX has another...
713 // white list for a directory
714 Regex directory("[-a-zA-Z0-9_/]*");
715#ifdef USE_GETENV
716 c = getenv("TMPDIR");
717 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
718 goto valid_temp_directory;
719#endif // USE_GETENV
720
721 // Unix defines this sometimes - if present, use it.
722#ifdef P_tmpdir
723 if (access(P_tmpdir, W_OK | R_OK) == 0) {
724 c = P_tmpdir;
725 goto valid_temp_directory;
726 }
727#endif
728
729 // The Unix default
730 c = "/tmp";
731 if (directory.match(c.c_str(), c.length()) && (access(c.c_str(), W_OK | R_OK) == 0))
732 goto valid_temp_directory;
733
734#endif // WIN32
735
736 // If we found nothing useful, use the current directory
737 c = ".";
738
739valid_temp_directory:
740
741#ifdef WIN32
742 c += "\\" + file_template;
743#else
744 c += "/" + file_template;
745#endif
746
747 return c;
748}
749
768string
769get_temp_file(FILE *&stream) throw(Error)
770{
771 string dods_temp = get_tempfile_template((string)"dodsXXXXXX");
772
773 vector<char> pathname(dods_temp.length() + 1);
774
775 strncpy(&pathname[0], dods_temp.c_str(), dods_temp.length());
776
777 DBG(cerr << "pathanme: " << &pathname[0] << " (" << dods_temp.length() + 1 << ")" << endl);
778
779 // Open truncated for update. NB: mkstemp() returns a file descriptor.
780#if defined(WIN32) || defined(TEST_WIN32_TEMPS)
781 stream = fopen(_mktemp(&pathname[0]), "w+b");
782#else
783 // Make sure that temp files are accessible only by the owner.
784 int mask = umask(077);
785 if (mask < 0)
786 throw Error("Could not set the file creation mask: " + string(strerror(errno)));
787 int fd = mkstemp(&pathname[0]);
788 if (fd < 0)
789 throw Error("Could not create a temporary file to store the response: " + string(strerror(errno)));
790
791 stream = fdopen(fd, "w+");
792 umask(mask);
793#endif
794
795 if (!stream)
796 throw Error("Failed to open a temporary file for the data values (" + dods_temp + ")");
797
798 dods_temp = &pathname[0];
799 return dods_temp;
800}
801
802
808void
809close_temp(FILE *s, const string &name)
810{
811 int res = fclose(s);
812 if (res)
813 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
814
815 res = unlink(name.c_str());
816 if (res != 0)
817 throw InternalErr(__FILE__, __LINE__, "!FAIL! " + long_to_string(res));
818}
819
841HTTPResponse *
842HTTPConnect::caching_fetch_url(const string &url)
843{
844 DBG(cerr << "Is this URL (" << url << ") in the cache?... ");
845
846 vector<string> *headers = new vector<string>;
847 string file_name;
848 FILE *s = d_http_cache->get_cached_response(url, *headers, file_name);
849 if (!s) {
850 // url not in cache; get it and cache it
851 DBGN(cerr << "no; getting response and caching." << endl);
852 delete headers; headers = 0;
853 time_t now = time(0);
854 HTTPResponse *rs = plain_fetch_url(url);
855 d_http_cache->cache_response(url, now, *(rs->get_headers()), rs->get_stream());
856
857 return rs;
858 }
859 else { // url in cache
860 DBGN(cerr << "yes... ");
861
862 if (d_http_cache->is_url_valid(url)) { // url in cache and valid
863 DBGN(cerr << "and it's valid; using cached response." << endl);
864 HTTPCacheResponse *crs = new HTTPCacheResponse(s, 200, headers, file_name, d_http_cache);
865 return crs;
866 }
867 else { // url in cache but not valid; validate
868 DBGN(cerr << "but it's not valid; validating... ");
869
870 d_http_cache->release_cached_response(s); // This closes 's'
871 headers->clear();
872 vector<string> cond_hdrs = d_http_cache->get_conditional_request_headers(url);
873 FILE *body = 0;
874 string dods_temp = get_temp_file(body);
875 time_t now = time(0); // When was the request made (now).
876 long http_status;
877
878 try {
879 http_status = read_url(url, body, /*resp_hdrs*/headers, &cond_hdrs);
880 rewind(body);
881 }
882 catch (Error &e) {
883 close_temp(body, dods_temp);
884 delete headers;
885 throw ;
886 }
887
888 switch (http_status) {
889 case 200: { // New headers and new body
890 DBGN(cerr << "read a new response; caching." << endl);
891
892 d_http_cache->cache_response(url, now, /* *resp_hdrs*/*headers, body);
893 HTTPResponse *rs = new HTTPResponse(body, http_status, /*resp_hdrs*/headers, dods_temp);
894
895 return rs;
896 }
897
898 case 304: { // Just new headers, use cached body
899 DBGN(cerr << "cached response valid; updating." << endl);
900
901 close_temp(body, dods_temp);
902 d_http_cache->update_response(url, now, /* *resp_hdrs*/ *headers);
903 string file_name;
904 FILE *hs = d_http_cache->get_cached_response(url, *headers, file_name);
905 HTTPCacheResponse *crs = new HTTPCacheResponse(hs, 304, headers, file_name, d_http_cache);
906 return crs;
907 }
908
909 default: { // Oops.
910 close_temp(body, dods_temp);
911 if (http_status >= 400) {
912 delete headers; headers = 0;
913 string msg = "Error while reading the URL: ";
914 msg += url;
915 msg
916 += ".\nThe OPeNDAP server returned the following message:\n";
917 msg += http_status_to_string(http_status);
918 throw Error(msg);
919 }
920 else {
921 delete headers; headers = 0;
922 throw InternalErr(__FILE__, __LINE__,
923 "Bad response from the HTTP server: " + long_to_string(http_status));
924 }
925 }
926 }
927 }
928 }
929
930 throw InternalErr(__FILE__, __LINE__, "Should never get here");
931}
932
944HTTPResponse *
945HTTPConnect::plain_fetch_url(const string &url)
946{
947 DBG(cerr << "Getting URL: " << url << endl);
948 FILE *stream = 0;
949 string dods_temp = get_temp_file(stream);
950 vector<string> *resp_hdrs = new vector<string>;
951
952 int status = -1;
953 try {
954 status = read_url(url, stream, resp_hdrs); // Throws Error.
955 if (status >= 400) {
956 // delete resp_hdrs; resp_hdrs = 0;
957 string msg = "Error while reading the URL: ";
958 msg += url;
959 msg += ".\nThe OPeNDAP server returned the following message:\n";
960 msg += http_status_to_string(status);
961 throw Error(msg);
962 }
963 }
964
965 catch (Error &e) {
966 delete resp_hdrs;
967 close_temp(stream, dods_temp);
968 throw;
969 }
970
971#if 0
972 if (d_use_cpp_streams) {
973 fclose(stream);
974 fstream *in = new fstream(dods_temp.c_str(), ios::in|ios::binary);
975 return new HTTPResponse(in, status, resp_hdrs, dods_temp);
976 }
977 else {
978#endif
979 rewind(stream);
980 return new HTTPResponse(stream, status, resp_hdrs, dods_temp);
981#if 0
982}
983#endif
984}
985
997void
999{
1000 d_accept_deflate = deflate;
1001
1002 if (d_accept_deflate) {
1003 if (find(d_request_headers.begin(), d_request_headers.end(),
1004 "Accept-Encoding: deflate, gzip, compress") == d_request_headers.end())
1005 d_request_headers.push_back(string("Accept-Encoding: deflate, gzip, compress"));
1006 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1007 ostream_iterator<string>(cerr, "\n")));
1008 }
1009 else {
1010 vector<string>::iterator i;
1011 i = remove_if(d_request_headers.begin(), d_request_headers.end(),
1012 bind2nd(equal_to<string>(),
1013 string("Accept-Encoding: deflate, gzip, compress")));
1014 d_request_headers.erase(i, d_request_headers.end());
1015 }
1016}
1017
1026void
1028{
1029 // Look for, and remove if one exists, an XDAP-Accept header
1030 vector<string>::iterator i;
1031 i = find_if(d_request_headers.begin(), d_request_headers.end(),
1032 HeaderMatch("XDAP-Accept:"));
1033 if (i != d_request_headers.end())
1034 d_request_headers.erase(i);
1035
1036 // Record and add the new header value
1037 d_dap_client_protocol_major = major;
1038 d_dap_client_protocol_minor = minor;
1039 ostringstream xdap_accept;
1040 xdap_accept << "XDAP-Accept: " << major << "." << minor;
1041
1042 d_request_headers.push_back(xdap_accept.str());
1043
1044 DBG(copy(d_request_headers.begin(), d_request_headers.end(),
1045 ostream_iterator<string>(cerr, "\n")));
1046}
1047
1063void
1064HTTPConnect::set_credentials(const string &u, const string &p)
1065{
1066 if (u.empty())
1067 return;
1068
1069 // Store the credentials locally.
1070 d_username = u;
1071 d_password = p;
1072
1073 d_upstring = u + ":" + p;
1074}
1075
1076} // namespace libdap
A class for error processing.
Definition: Error.h:91
bool cache_response(const string &url, time_t request_time, const vector< string > &headers, const FILE *body)
Definition: HTTPCache.cc:1156
static HTTPCache * instance(const string &cache_root, bool force=false)
Definition: HTTPCache.cc:129
void set_expire_ignored(bool mode)
Definition: HTTPCache.cc:690
void set_default_expiration(int exp_time)
Definition: HTTPCache.cc:819
void release_cached_response(FILE *response)
Definition: HTTPCache.cc:1571
vector< string > get_conditional_request_headers(const string &url)
Definition: HTTPCache.cc:1249
void set_cache_enabled(bool mode)
Definition: HTTPCache.cc:635
void set_max_entry_size(unsigned long size)
Definition: HTTPCache.cc:772
bool is_url_valid(const string &url)
Definition: HTTPCache.cc:1388
void set_always_validate(bool validate)
Definition: HTTPCache.cc:841
void update_response(const string &url, time_t request_time, const vector< string > &headers)
Definition: HTTPCache.cc:1319
void set_max_size(unsigned long size)
Definition: HTTPCache.cc:724
FILE * get_cached_response(const string &url, vector< string > &headers, string &cacheName)
Definition: HTTPCache.cc:1480
void set_accept_deflate(bool defalte)
Definition: HTTPConnect.cc:998
HTTPResponse * fetch_url(const string &url)
Definition: HTTPConnect.cc:617
void set_credentials(const string &u, const string &p)
void set_xdap_protocol(int major, int minor)
A class for software fault reporting.
Definition: InternalErr.h:65
string get_proxy_server_host() const
Get the proxy host.
Definition: RCReader.h:181
int get_proxy_server_port() const
Get the proxy port.
Definition: RCReader.h:186
string get_proxy_server_userpw() const
Get the proxy username and password.
Definition: RCReader.h:191
string get_proxy_for_regexp() const
Definition: RCReader.h:215
bool is_proxy_for_used()
Definition: RCReader.h:210
ObjectType get_description_type(const string &value)
Definition: mime_util.cc:339
void parse_mime_header(const string &header, string &name, string &value)
Definition: mime_util.cc:898
string get_temp_file(FILE *&stream)
Definition: HTTPConnect.cc:769
void close_temp(FILE *s, const string &name)
Definition: HTTPConnect.cc:809
ObjectType
The type of object in the stream coming from the data server.
Definition: ObjectType.h:58