libdap Updated for version 3.18.1
chunked_istream.cc
1// -*- mode: c++; c-basic-offset:4 -*-
2
3// This file is part of libdap, A C++ implementation of the OPeNDAP Data
4// Access Protocol.
5
6// Copyright (c) 2009 OPeNDAP, Inc.
7// Author: James Gallagher <jgallagher@opendap.org>
8//
9// This library is free software; you can redistribute it and/or
10// modify it under the terms of the GNU Lesser General Public
11// License as published by the Free Software Foundation; either
12// version 2.1 of the License, or (at your option) any later version.
13//
14// This library is distributed in the hope that it will be useful,
15// but WITHOUT ANY WARRANTY; without even the implied warranty of
16// MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
17// Lesser General Public License for more details.
18//
19// You should have received a copy of the GNU Lesser General Public
20// License along with this library; if not, write to the Free Software
21// Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
22//
23// You can contact OPeNDAP, Inc. at PO Box 112, Saunderstown, RI. 02874-0112.
24//
25// Portions of this code were taken verbatim from Josuttis,
26// "The C++ Standard Library," p.672
27
28#include "config.h"
29
30#include <stdint.h>
31#include <byteswap.h>
32#include <arpa/inet.h>
33
34#include <cstring>
35#include <vector>
36
37#include "chunked_stream.h"
38#include "chunked_istream.h"
39
40#include "Error.h"
41
42//#define DODS_DEBUG
43//#define DODS_DEBUG2
44#ifdef DODS_DEBUG
45#include <iostream>
46#endif
47
48#include "util.h"
49#include "debug.h"
50
51namespace libdap {
52
53/*
54 This code does not use a 'put back' buffer, but here's a picture of the
55 d_buffer pointer, eback(), gptr() and egptr() that can be used to see how
56 the I/O Stream library's streambuf class works. For the case with no
57 putback, just imagine it as zero and eliminate the leftmost extension. This
58 might also come in useful if the code was extended to support put back. I
59 removed that feature because I don't see it being used with our chunked
60 transmission protocol and it requires an extra call to memcopy() when data
61 are added to the internal buffer.
62
63 d_buffer d_buffer + putBack
64 | |
65 v v
66 |---------|--------------------------------------------|....
67 | | | .
68 |---------|--------------------------------------------|....
69 ^ ^ ^
70 | | |
71 eback() gptr() egptr()
72
73 */
74
84std::streambuf::int_type
86{
87 DBG(cerr << "underflow..." << endl);
88 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
89
90 // return the next character; uflow() increments the puffer pointer.
91 if (gptr() < egptr())
92 return traits_type::to_int_type(*gptr());
93
94 // gptr() == egptr() so read more data from the underlying input source.
95
96 // To read data from the chunked stream, first read the header
97 uint32_t header;
98 d_is.read((char *) &header, 4);
99#if !BYTE_ORDER_PREFIX
100 // When the endian nature of the server is encoded in the chunk header, the header is
101 // sent using network byte order
102 ntohl(header);
103#endif
104
105 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
106 // it holds data. In the latter case, bytes those will be read and moved into the
107 // buffer. Once those data are consumed, we'll be back here again and this read()
108 // will return EOF. See below for the other case...
109 if (d_is.eof()) return traits_type::eof();
110#if BYTE_ORDER_PREFIX
111 if (d_twiddle_bytes) header = bswap_32(header);
112#else
113 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
114 if (!d_set_twiddle) {
115 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
116 d_set_twiddle = true;
117 }
118#endif
119 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
120
121 DBG(cerr << "underflow: chunk size from header: " << chunk_size << endl);
122 DBG(cerr << "underflow: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
123 DBG(cerr << "underflow: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
124
125 // Handle the case where the buffer is not big enough to hold the incoming chunk
126 if (chunk_size > d_buf_size) {
127 d_buf_size = chunk_size;
128 m_buffer_alloc();
129 }
130
131 // If the END chunk has zero bytes, return EOF. See above for more information
132 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
133
134 // Read the chunk's data
135 d_is.read(d_buffer, chunk_size);
136 DBG2(cerr << "underflow: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
137 if (d_is.bad()) return traits_type::eof();
138
139 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
140 setg(d_buffer, // beginning of put back area
141 d_buffer, // read position (gptr() == eback())
142 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
143
144 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
145
146 switch (header & CHUNK_TYPE_MASK) {
147 case CHUNK_END:
148 DBG2(cerr << "Found end chunk" << endl);
149 return traits_type::to_int_type(*gptr());
150 case CHUNK_DATA:
151 return traits_type::to_int_type(*gptr());
152
153 case CHUNK_ERR:
154 // this is pretty much the end of the show... Assume the buffer/chunk holds
155 // the error message text.
156 d_error = true;
157 d_error_message = string(d_buffer, chunk_size);
158 return traits_type::eof();
159 default:
160 d_error = true;
161 d_error_message = "Failed to read known chunk header type.";
162 return traits_type::eof();
163 }
164
165 return traits_type::eof(); // Can never get here; this quiets g++
166}
167
184std::streamsize
185chunked_inbuf::xsgetn(char* s, std::streamsize num)
186{
187 DBG(cerr << "xsgetn... num: " << num << endl);
188
189 // if num is <= the chars currently in the buffer
190 if (num <= (egptr() - gptr())) {
191 memcpy(s, gptr(), num);
192 gbump(num);
193
194 return traits_type::not_eof(num);
195 }
196
197 // else they asked for more
198 uint32_t bytes_left_to_read = num;
199
200 // are there any bytes in the buffer? if so grab them first
201 if (gptr() < egptr()) {
202 int bytes_to_transfer = egptr() - gptr();
203 memcpy(s, gptr(), bytes_to_transfer);
204 gbump(bytes_to_transfer);
205 s += bytes_to_transfer;
206 bytes_left_to_read -= bytes_to_transfer;
207 }
208
209 // We need to get more bytes from the underlying stream; at this
210 // point the internal buffer is empty.
211
212 // read the remaining bytes to transfer, a chunk at a time,
213 // and put any leftover stuff in the buffer.
214
215 // note that when the code is here, gptr() == egptr(), so the
216 // next call to read() will fall through the previous tests and
217 // read at least one chunk here.
218 bool done = false;
219 while (!done) {
220 // Get a chunk header
221 uint32_t header;
222 d_is.read((char *) &header, 4);
223#if !BYTE_ORDER_PREFIX
224 ntohl(header);
225#endif
226
227 // There are two EOF cases: One where the END chunk is zero bytes and one where
228 // it holds data. In the latter case, those will be read and moved into the
229 // buffer. Once those data are consumed, we'll be back here again and this read()
230 // will return EOF. See below for the other case...
231 if (d_is.eof()) return traits_type::eof();
232#if BYTE_ORDER_PREFIX
233 if (d_twiddle_bytes) header = bswap_32(header);
234#else
235 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
236 if (!d_set_twiddle) {
237 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
238 d_set_twiddle = true;
239 }
240#endif
241
242 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
243 DBG(cerr << "xsgetn: chunk size from header: " << chunk_size << endl);
244 DBG(cerr << "xsgetn: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
245 DBG(cerr << "xsgetn: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
246
247 // handle error chunks here
248 if ((header & CHUNK_TYPE_MASK) == CHUNK_ERR) {
249 d_error = true;
250 // Note that d_buffer is not used to avoid calling resize if it is too
251 // small to hold the error message. At this point, there's not much reason
252 // to optimize transport efficiency, however.
253 std::vector<char> message(chunk_size);
254 d_is.read(&message[0], chunk_size);
255 d_error_message = string(&message[0], chunk_size);
256 // leave the buffer and gptr(), ..., in a consistent state (empty)
257 setg(d_buffer, d_buffer, d_buffer);
258 }
259 // And zero-length END chunks here.
260 else if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) {
261 return traits_type::not_eof(num-bytes_left_to_read);
262 }
263 // The next case is complicated because we read some data from the current
264 // chunk into 's' an some into the internal buffer.
265 else if (chunk_size > bytes_left_to_read) {
266 d_is.read(s, bytes_left_to_read);
267 if (d_is.bad()) return traits_type::eof();
268
269 // Now slurp up the remain part of the chunk and store it in the buffer
270 uint32_t bytes_leftover = chunk_size - bytes_left_to_read;
271 // expand the internal buffer if needed
272 if (bytes_leftover > d_buf_size) {
273 d_buf_size = chunk_size;
274 m_buffer_alloc();
275 }
276 // read the remain stuff in to d_buffer
277 d_is.read(d_buffer, bytes_leftover);
278 if (d_is.bad()) return traits_type::eof();
279
280 setg(d_buffer, // beginning of put back area
281 d_buffer, // read position (gptr() == eback())
282 d_buffer + bytes_leftover /*d_is.gcount()*/); // end of buffer (egptr())
283
284 bytes_left_to_read = 0 /* -= d_is.gcount()*/;
285 }
286 else {
287 // expand the internal buffer if needed
288 if (chunk_size > d_buf_size) {
289 d_buf_size = chunk_size;
290 m_buffer_alloc();
291 }
292 // If we get a chunk that's zero bytes, Don't call read()
293 // to save the kernel context switch overhead.
294 if (chunk_size > 0) {
295 d_is.read(s, chunk_size);
296 if (d_is.bad()) return traits_type::eof();
297 bytes_left_to_read -= chunk_size /*d_is.gcount()*/;
298 s += chunk_size;
299 }
300 }
301
302 switch (header & CHUNK_TYPE_MASK) {
303 case CHUNK_END:
304 DBG(cerr << "Found end chunk" << endl);
305 // in this case bytes_left_to_read can be > 0 because we ran out of data
306 // before reading all the requested bytes. The next read() call will return
307 // eof; this call returns the number of bytes read and transferred to 's'.
308 done = true;
309 break;
310 case CHUNK_DATA:
311 done = bytes_left_to_read == 0;
312 break;
313 case CHUNK_ERR:
314 // this is pretty much the end of the show... The error message has
315 // already been read above
316 return traits_type::eof();
317 break;
318 default:
319 d_error = true;
320 d_error_message = "Failed to read known chunk header type.";
321 return traits_type::eof();
322 }
323 }
324
325 return traits_type::not_eof(num-bytes_left_to_read);
326}
327
340std::streambuf::int_type
342{
343 // To read data from the chunked stream, first read the header
344 uint32_t header;
345 d_is.read((char *) &header, 4);
346#if !BYTE_ORDER_PREFIX
347 ntohl(header);
348#endif
349
350 // There are two 'EOF' cases: One where the END chunk is zero bytes and one where
351 // it holds data. In the latter case, bytes those will be read and moved into the
352 // buffer. Once those data are consumed, we'll be back here again and this read()
353 // will return EOF. See below for the other case...
354 if (d_is.eof()) return traits_type::eof();
355#if BYTE_ORDER_PREFIX
356 if (d_twiddle_bytes) header = bswap_32(header);
357#else
358 // (header & CHUNK_LITTLE_ENDIAN) --> is the sender little endian
359 if (!d_set_twiddle) {
360 d_twiddle_bytes = (is_host_big_endian() == (header & CHUNK_LITTLE_ENDIAN));
361 d_set_twiddle = true;
362 }
363#endif
364
365 uint32_t chunk_size = header & CHUNK_SIZE_MASK;
366
367 DBG(cerr << "read_next_chunk: chunk size from header: " << chunk_size << endl);
368 DBG(cerr << "read_next_chunk: chunk type from header: " << hex << (header & CHUNK_TYPE_MASK) << endl);
369 DBG(cerr << "read_next_chunk: chunk byte order from header: " << hex << (header & CHUNK_BIG_ENDIAN) << endl);
370
371 // Handle the case where the buffer is not big enough to hold the incoming chunk
372 if (chunk_size > d_buf_size) {
373 d_buf_size = chunk_size;
374 m_buffer_alloc();
375 }
376
377 // If the END chunk has zero bytes, return EOF. See above for more information
378 if (chunk_size == 0 && (header & CHUNK_TYPE_MASK) == CHUNK_END) return traits_type::eof();
379
380 // Read the chunk's data
381 d_is.read(d_buffer, chunk_size);
382 DBG2(cerr << "read_next_chunk: size read: " << d_is.gcount() << ", eof: " << d_is.eof() << ", bad: " << d_is.bad() << endl);
383 if (d_is.bad()) return traits_type::eof();
384
385 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
386 setg(d_buffer, // beginning of put back area
387 d_buffer, // read position (gptr() == eback())
388 d_buffer + chunk_size); // end of buffer (egptr()) chunk_size == d_is.gcount() unless there's an error
389
390 DBG2(cerr << "eback(): " << (void*)eback() << ", gptr(): " << (void*)(gptr()-eback()) << ", egptr(): " << (void*)(egptr()-eback()) << endl);
391
392 switch (header & CHUNK_TYPE_MASK) {
393 case CHUNK_END:
394 DBG(cerr << "Found end chunk" << endl);
395 return traits_type::not_eof(chunk_size);
396 case CHUNK_DATA:
397 return traits_type::not_eof(chunk_size);
398
399 case CHUNK_ERR:
400 // this is pretty much the end of the show... Assume the buffer/chunk holds
401 // the error message text.
402 d_error = true;
403 d_error_message = string(d_buffer, chunk_size);
404 return traits_type::eof();
405 default:
406 d_error = true;
407 d_error_message = "Failed to read known chunk header type.";
408 return traits_type::eof();
409 }
410
411 return traits_type::eof(); // Can never get here; this quiets g++
412}
413
414}
virtual int_type underflow()
Insert new characters into the buffer This specialization of underflow is called when the gptr() is a...
int_type read_next_chunk()
Read a chunk Normally the chunked nature of a chunked_istream/chunked_inbuf is hidden from the caller...
virtual std::streamsize xsgetn(char *s, std::streamsize num)
Read a block of data This specialization of xsgetn() reads num bytes and puts them in s first reading...
bool is_host_big_endian()
Does this host use big-endian byte order?
Definition: util.cc:93