Leptonica 1.82.0
Image processing and image analysis suite
parseprotos.c
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27/*
28 * \file parseprotos.c
29 * <pre>
30 *
31 * char *parseForProtos()
32 *
33 * Static helpers
34 * static l_int32 getNextNonCommentLine()
35 * static l_int32 getNextNonBlankLine()
36 * static l_int32 getNextNonDoubleSlashLine()
37 * static l_int32 searchForProtoSignature()
38 * static char *captureProtoSignature()
39 * static char *cleanProtoSignature()
40 * static l_int32 skipToEndOfFunction()
41 * static l_int32 skipToMatchingBrace()
42 * static l_int32 skipToSemicolon()
43 * static l_int32 getOffsetForCharacter()
44 * static l_int32 getOffsetForMatchingRP()
45 * </pre>
46 */
47
48#ifdef HAVE_CONFIG_H
49#include <config_auto.h>
50#endif /* HAVE_CONFIG_H */
51
52#include <string.h>
53#include "allheaders.h"
54
55#define L_BUF_SIZE 2048 /* max token size */
56
57static l_int32 getNextNonCommentLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
58static l_int32 getNextNonBlankLine(SARRAY *sa, l_int32 start, l_int32 *pnext);
59static l_int32 getNextNonDoubleSlashLine(SARRAY *sa, l_int32 start,
60 l_int32 *pnext);
61static l_int32 searchForProtoSignature(SARRAY *sa, l_int32 begin,
62 l_int32 *pstart, l_int32 *pstop, l_int32 *pcharindex,
63 l_int32 *pfound);
64static char * captureProtoSignature(SARRAY *sa, l_int32 start, l_int32 stop,
65 l_int32 charindex);
66static char * cleanProtoSignature(char *str);
67static l_int32 skipToEndOfFunction(SARRAY *sa, l_int32 start,
68 l_int32 charindex, l_int32 *pnext);
69static l_int32 skipToMatchingBrace(SARRAY *sa, l_int32 start,
70 l_int32 lbindex, l_int32 *prbline, l_int32 *prbindex);
71static l_int32 skipToSemicolon(SARRAY *sa, l_int32 start,
72 l_int32 charindex, l_int32 *pnext);
73static l_int32 getOffsetForCharacter(SARRAY *sa, l_int32 start, char tchar,
74 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
75static l_int32 getOffsetForMatchingRP(SARRAY *sa, l_int32 start,
76 l_int32 soffsetlp, l_int32 boffsetlp, l_int32 toffsetlp,
77 l_int32 *psoffset, l_int32 *pboffset, l_int32 *ptoffset);
78
79
80/*
81 * \brief parseForProtos()
82 *
83 * \param[in] filein output of cpp
84 * \param[in] prestring [optional] string that prefaces each decl;
85 * use NULL to omit
86 * \return parsestr string of function prototypes, or NULL on error
87 *
88 * <pre>
89 * Notes:
90 * (1) We parse the output of cpp:
91 * cpp -ansi <filein>
92 * Three plans were attempted, with success on the third.
93 * (2) Plan 1. A cursory examination of the cpp output indicated that
94 * every function was preceded by a cpp comment statement.
95 * So we just need to look at statements beginning after comments.
96 * Unfortunately, this is NOT the case. Some functions start
97 * without cpp comment lines, typically when there are no
98 * comments in the source that immediately precede the function.
99 * (3) Plan 2. Consider the keywords in the language that start
100 * parts of the cpp file. Some, like 'enum', 'union' and
101 * 'struct', are followed after a while by '{', and eventually
102 * end with '}, plus an optional token and a final ';'.
103 * Others, like 'extern', 'static' and 'typedef', are never
104 * the beginnings of global function definitions. Function
105 * prototypes have one or more sets of '(' followed eventually
106 * by a ')', and end with ';'. But function definitions have
107 * tokens, followed by '(', more tokens, ')' and then
108 * immediately a '{'. We would generate a prototype from this
109 * by adding a ';' to all tokens up to the ')'. So we use
110 * these special tokens to decide what we are parsing. And
111 * whenever a function definition is found and the prototype
112 * extracted, we skip through the rest of the function
113 * past the corresponding '}'. This token ends a line, and
114 * is often on a line of its own. But as it turns out,
115 * the only keyword we need to consider is 'static'.
116 * (4) Plan 3. Consider the parentheses and braces for various
117 * declarations. A struct, enum, or union has a pair of
118 * braces followed by a semicolon. With the exception of an
119 * __attribute__ declaration for a struct, they cannot have parentheses
120 * before the left brace, but a struct can have lots of parentheses
121 * within the brace set. A function prototype has no braces.
122 * A function declaration can have sets of left and right
123 * parentheses, but these are followed by a left brace.
124 * So plan 3 looks at the way parentheses and braces are
125 * organized. Once the beginning of a function definition
126 * is found, the prototype is extracted and we search for
127 * the ending right brace.
128 * (5) To find the ending right brace, it is necessary to do some
129 * careful parsing. For example, in this file, we have
130 * left and right braces as characters, and these must not
131 * be counted. Somewhat more tricky, the file fhmtauto.c
132 * generates code, and includes a right brace in a string.
133 * So we must not include braces that are in strings. But how
134 * do we know if something is inside a string? Keep state,
135 * starting with not-inside, and every time you hit a double quote
136 * that is not escaped, toggle the condition. Any brace
137 * found in the state of being within a string is ignored.
138 * (6) When a prototype is extracted, it is put in a canonical
139 * form (i.e., cleaned up). Finally, we check that it is
140 * not static and save it. (If static, it is ignored).
141 * (7) The %prestring for unix is NULL; it is included here so that
142 * you can use Microsoft's declaration for importing or
143 * exporting to a dll. See environ.h for examples of use.
144 * Here, we set: %prestring = "LEPT_DLL ". Note in particular
145 * the space character that will separate 'LEPT_DLL' from
146 * the standard unix prototype that follows.
147 * </pre>
148 */
149char *
150parseForProtos(const char *filein,
151 const char *prestring)
152{
153char *strdata, *str, *newstr, *parsestr, *secondword;
154l_int32 start, next, stop, charindex, found;
155size_t nbytes;
156SARRAY *sa, *saout, *satest;
157
158 PROCNAME("parseForProtos");
159
160 if (!filein)
161 return (char *)ERROR_PTR("filein not defined", procName, NULL);
162
163 /* Read in the cpp output into memory, one string for each
164 * line in the file, omitting blank lines. */
165 strdata = (char *)l_binaryRead(filein, &nbytes);
166 sa = sarrayCreateLinesFromString(strdata, 0);
167
168 saout = sarrayCreate(0);
169 next = 0;
170 while (1) { /* repeat after each non-static prototype is extracted */
171 searchForProtoSignature(sa, next, &start, &stop, &charindex, &found);
172 if (!found)
173 break;
174/* lept_stderr(" start = %d, stop = %d, charindex = %d\n",
175 start, stop, charindex); */
176 str = captureProtoSignature(sa, start, stop, charindex);
177
178 /* Make sure that the signature found by cpp does not begin with
179 * static, extern or typedef. We get 'extern' declarations
180 * from header files, and with some versions of cpp running on
181 * #include <sys/stat.h> we get something of the form:
182 * extern ... (( ... )) ... ( ... ) { ...
183 * For this, the 1st '(' is the lp, the 2nd ')' is the rp,
184 * and there is a lot of garbage between the rp and the lp.
185 * It is easiest to simply reject any signature that starts
186 * with 'extern'. Note also that an 'extern' token has been
187 * prepended to each prototype, so the 'static' or
188 * 'extern' keywords we are looking for, if they exist,
189 * would be the second word. We also have a typedef in
190 * bmpio.c that has the form:
191 * typedef struct __attribute__((....)) { ...} ... ;
192 * This is avoided by blacklisting 'typedef' along with 'extern'
193 * and 'static'. */
194 satest = sarrayCreateWordsFromString(str);
195 secondword = sarrayGetString(satest, 1, L_NOCOPY);
196 if (strcmp(secondword, "static") && /* not static */
197 strcmp(secondword, "extern") && /* not extern */
198 strcmp(secondword, "typedef")) { /* not typedef */
199 if (prestring) { /* prepend it to the prototype */
200 newstr = stringJoin(prestring, str);
201 sarrayAddString(saout, newstr, L_INSERT);
202 LEPT_FREE(str);
203 } else {
204 sarrayAddString(saout, str, L_INSERT);
205 }
206 } else {
207 LEPT_FREE(str);
208 }
209 sarrayDestroy(&satest);
210
211 skipToEndOfFunction(sa, stop, charindex, &next);
212 if (next == -1) break;
213 }
214
215 /* Flatten into a string with newlines between prototypes */
216 parsestr = sarrayToString(saout, 1);
217 LEPT_FREE(strdata);
218 sarrayDestroy(&sa);
219 sarrayDestroy(&saout);
220
221 return parsestr;
222}
223
224
225/*
226 * \brief getNextNonCommentLine()
227 *
228 * \param[in] sa output from cpp, by line)
229 * \param[in] start starting index to search)
230 * \param[out] pnext index of first uncommented line after the start line
231 * \return 0 if OK, o on error
232 *
233 * <pre>
234 * Notes:
235 * (1) Skips over all consecutive comment lines, beginning at 'start'
236 * (2) If all lines to the end are '#' comments, return next = -1
237 * </pre>
238 */
239static l_int32
240getNextNonCommentLine(SARRAY *sa,
241 l_int32 start,
242 l_int32 *pnext)
243{
244char *str;
245l_int32 i, n;
246
247 PROCNAME("getNextNonCommentLine");
248
249 if (!sa)
250 return ERROR_INT("sa not defined", procName, 1);
251 if (!pnext)
252 return ERROR_INT("&pnext not defined", procName, 1);
253
254 /* Init for situation where this line and all following are comments */
255 *pnext = -1;
256
257 n = sarrayGetCount(sa);
258 for (i = start; i < n; i++) {
259 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
260 return ERROR_INT("str not returned; shouldn't happen", procName, 1);
261 if (str[0] != '#') {
262 *pnext = i;
263 return 0;
264 }
265 }
266
267 return 0;
268}
269
270
271/*
272 * \brief getNextNonBlankLine()
273 *
274 * \param[in] sa output from cpp, by line
275 * \param[in] start starting index to search
276 * \param[out] pnext index of first nonblank line after the start line
277 * \return 0 if OK, 1 on error
278 *
279 * <pre>
280 * Notes:
281 * (1) Skips over all consecutive blank lines, beginning at 'start'
282 * (2) A blank line has only whitespace characters (' ', '\t', '\n', '\r')
283 * (3) If all lines to the end are blank, return next = -1
284 * </pre>
285 */
286static l_int32
287getNextNonBlankLine(SARRAY *sa,
288 l_int32 start,
289 l_int32 *pnext)
290{
291char *str;
292l_int32 i, j, n, len;
293
294 PROCNAME("getNextNonBlankLine");
295
296 if (!sa)
297 return ERROR_INT("sa not defined", procName, 1);
298 if (!pnext)
299 return ERROR_INT("&pnext not defined", procName, 1);
300
301 /* Init for situation where this line and all following are blank */
302 *pnext = -1;
303
304 n = sarrayGetCount(sa);
305 for (i = start; i < n; i++) {
306 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
307 return ERROR_INT("str not returned; shouldn't happen", procName, 1);
308 len = strlen(str);
309 for (j = 0; j < len; j++) {
310 if (str[j] != ' ' && str[j] != '\t'
311 && str[j] != '\n' && str[j] != '\r') { /* non-blank */
312 *pnext = i;
313 return 0;
314 }
315 }
316 }
317
318 return 0;
319}
320
321
322/*
323 * \brief getNextNonDoubleSlashLine()
324 *
325 * \param[in] sa output from cpp, by line
326 * \param[in] start starting index to search
327 * \param[out] pnext index of first uncommented line after the start line
328 * \return 0 if OK, 1 on error
329 *
330 * <pre>
331 * Notes:
332 * (1) Skips over all consecutive '//' lines, beginning at 'start'
333 * (2) If all lines to the end start with '//', return next = -1
334 * </pre>
335 */
336static l_int32
337getNextNonDoubleSlashLine(SARRAY *sa,
338 l_int32 start,
339 l_int32 *pnext)
340{
341char *str;
342l_int32 i, n, len;
343
344 PROCNAME("getNextNonDoubleSlashLine");
345
346 if (!sa)
347 return ERROR_INT("sa not defined", procName, 1);
348 if (!pnext)
349 return ERROR_INT("&pnext not defined", procName, 1);
350
351 /* Init for situation where this line and all following
352 * start with '//' */
353 *pnext = -1;
354
355 n = sarrayGetCount(sa);
356 for (i = start; i < n; i++) {
357 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
358 return ERROR_INT("str not returned; shouldn't happen", procName, 1);
359 len = strlen(str);
360 if (len < 2 || str[0] != '/' || str[1] != '/') {
361 *pnext = i;
362 return 0;
363 }
364 }
365
366 return 0;
367}
368
369
370/*
371 * \brief searchForProtoSignature()
372 *
373 * \param[in] sa output from cpp, by line
374 * \param[in] begin beginning index to search
375 * \param[out] pstart starting index for function definition
376 * \param[out] pstop index of line on which proto is completed
377 * \param[out] pcharindex char index of completing ')' character
378 * \param[out] pfound 1 if valid signature is found; 0 otherwise
379 * \return 0 if OK, 1 on error
380 *
381 * <pre>
382 * Notes:
383 * (1) If this returns found == 0, it means that there are no
384 * more function definitions in the file. Caller must check
385 * this value and exit the loop over the entire cpp file.
386 * (2) This follows plan 3 (see above). We skip comment and blank
387 * lines at the beginning. Then we don't check for keywords.
388 * Instead, find the relative locations of the first occurrences
389 * of these four tokens: left parenthesis (lp), right
390 * parenthesis (rp), left brace (lb) and semicolon (sc).
391 * (3) The signature of a function definition looks like this:
392 * .... '(' .... ')' '{'
393 * where the lp and rp must both precede the lb, with only
394 * whitespace between the rp and the lb. The '....'
395 * are sets of tokens that have no braces.
396 * (4) If a function definition is found, this returns found = 1,
397 * with 'start' being the first line of the definition and
398 * 'charindex' being the position of the ')' in line 'stop'
399 * at the end of the arg list.
400 * </pre>
401 */
402static l_int32
403searchForProtoSignature(SARRAY *sa,
404 l_int32 begin,
405 l_int32 *pstart,
406 l_int32 *pstop,
407 l_int32 *pcharindex,
408 l_int32 *pfound)
409{
410l_int32 next, rbline, rbindex, scline;
411l_int32 soffsetlp, soffsetrp, soffsetlb, soffsetsc;
412l_int32 boffsetlp, boffsetrp, boffsetlb, boffsetsc;
413l_int32 toffsetlp, toffsetrp, toffsetlb, toffsetsc;
414
415 PROCNAME("searchForProtoSignature");
416
417 if (!sa)
418 return ERROR_INT("sa not defined", procName, 1);
419 if (!pstart)
420 return ERROR_INT("&start not defined", procName, 1);
421 if (!pstop)
422 return ERROR_INT("&stop not defined", procName, 1);
423 if (!pcharindex)
424 return ERROR_INT("&charindex not defined", procName, 1);
425 if (!pfound)
426 return ERROR_INT("&found not defined", procName, 1);
427
428 *pfound = FALSE;
429
430 while (1) {
431
432 /* Skip over sequential '#' comment lines */
433 getNextNonCommentLine(sa, begin, &next);
434 if (next == -1) return 0;
435 if (next != begin) {
436 begin = next;
437 continue;
438 }
439
440 /* Skip over sequential blank lines */
441 getNextNonBlankLine(sa, begin, &next);
442 if (next == -1) return 0;
443 if (next != begin) {
444 begin = next;
445 continue;
446 }
447
448 /* Skip over sequential lines starting with '//' */
449 getNextNonDoubleSlashLine(sa, begin, &next);
450 if (next == -1) return 0;
451 if (next != begin) {
452 begin = next;
453 continue;
454 }
455
456 /* Search for specific character sequence patterns; namely
457 * a lp, a matching rp, a lb and a semicolon.
458 * Abort the search if no lp is found. */
459 getOffsetForCharacter(sa, next, '(', &soffsetlp, &boffsetlp,
460 &toffsetlp);
461 if (soffsetlp == -1)
462 break;
463 getOffsetForMatchingRP(sa, next, soffsetlp, boffsetlp, toffsetlp,
464 &soffsetrp, &boffsetrp, &toffsetrp);
465 getOffsetForCharacter(sa, next, '{', &soffsetlb, &boffsetlb,
466 &toffsetlb);
467 getOffsetForCharacter(sa, next, ';', &soffsetsc, &boffsetsc,
468 &toffsetsc);
469
470 /* We've found a lp. Now weed out the case where a matching
471 * rp and a lb are not both found. */
472 if (soffsetrp == -1 || soffsetlb == -1)
473 break;
474
475 /* Check if a left brace occurs before a left parenthesis;
476 * if so, skip it */
477 if (toffsetlb < toffsetlp) {
478 skipToMatchingBrace(sa, next + soffsetlb, boffsetlb,
479 &rbline, &rbindex);
480 skipToSemicolon(sa, rbline, rbindex, &scline);
481 begin = scline + 1;
482 continue;
483 }
484
485 /* Check if a semicolon occurs before a left brace or
486 * a left parenthesis; if so, skip it */
487 if ((soffsetsc != -1) &&
488 (toffsetsc < toffsetlb || toffsetsc < toffsetlp)) {
489 skipToSemicolon(sa, next, 0, &scline);
490 begin = scline + 1;
491 continue;
492 }
493
494 /* OK, it should be a function definition. We haven't
495 * checked that there is only white space between the
496 * rp and lb, but we've only seen problems with two
497 * extern inlines in sys/stat.h, and this is handled
498 * later by eliminating any prototype beginning with 'extern'. */
499 *pstart = next;
500 *pstop = next + soffsetrp;
501 *pcharindex = boffsetrp;
502 *pfound = TRUE;
503 break;
504 }
505
506 return 0;
507}
508
509
510/*
511 * \brief captureProtoSignature()
512 *
513 * \param[in] sa output from cpp, by line
514 * \param[in] start starting index to search; never a comment line
515 * \param[in] stop index of line on which pattern is completed
516 * \param[in] charindex char index of completing ')' character
517 * \return cleanstr prototype string, or NULL on error
518 *
519 * <pre>
520 * Notes:
521 * (1) Return all characters, ending with a ';' after the ')'
522 * </pre>
523 */
524static char *
525captureProtoSignature(SARRAY *sa,
526 l_int32 start,
527 l_int32 stop,
528 l_int32 charindex)
529{
530char *str, *newstr, *protostr, *cleanstr;
531SARRAY *sap;
532l_int32 i;
533
534 PROCNAME("captureProtoSignature");
535
536 if (!sa)
537 return (char *)ERROR_PTR("sa not defined", procName, NULL);
538
539 sap = sarrayCreate(0);
540 for (i = start; i < stop; i++) {
541 str = sarrayGetString(sa, i, L_COPY);
542 sarrayAddString(sap, str, L_INSERT);
543 }
544 str = sarrayGetString(sa, stop, L_COPY);
545 str[charindex + 1] = '\0';
546 newstr = stringJoin(str, ";");
547 sarrayAddString(sap, newstr, L_INSERT);
548 LEPT_FREE(str);
549 protostr = sarrayToString(sap, 2);
550 sarrayDestroy(&sap);
551 cleanstr = cleanProtoSignature(protostr);
552 LEPT_FREE(protostr);
553
554 return cleanstr;
555}
556
557
558/*
559 * \brief cleanProtoSignature()
560 *
561 * \param[in] instr input prototype string
562 * \return cleanstr clean prototype string, or NULL on error
563 *
564 * <pre>
565 * Notes:
566 * (1) Adds 'extern' at beginning and regularizes spaces
567 * between tokens.
568 * </pre>
569 */
570static char *
571cleanProtoSignature(char *instr)
572{
573char *str, *cleanstr;
574char buf[L_BUF_SIZE];
575char externstring[] = "extern";
576l_int32 i, j, nwords, nchars, index, len;
577SARRAY *sa, *saout;
578
579 PROCNAME("cleanProtoSignature");
580
581 if (!instr)
582 return (char *)ERROR_PTR("instr not defined", procName, NULL);
583
584 sa = sarrayCreateWordsFromString(instr);
585 nwords = sarrayGetCount(sa);
586 saout = sarrayCreate(0);
587 sarrayAddString(saout, externstring, L_COPY);
588 for (i = 0; i < nwords; i++) {
589 str = sarrayGetString(sa, i, L_NOCOPY);
590 nchars = strlen(str);
591 index = 0;
592 for (j = 0; j < nchars; j++) {
593 if (index > L_BUF_SIZE - 6) {
594 sarrayDestroy(&sa);
595 sarrayDestroy(&saout);
596 return (char *)ERROR_PTR("token too large", procName, NULL);
597 }
598 if (str[j] == '(') {
599 buf[index++] = ' ';
600 buf[index++] = '(';
601 buf[index++] = ' ';
602 } else if (str[j] == ')') {
603 buf[index++] = ' ';
604 buf[index++] = ')';
605 } else {
606 buf[index++] = str[j];
607 }
608 }
609 buf[index] = '\0';
610 sarrayAddString(saout, buf, L_COPY);
611 }
612
613 /* Flatten to a prototype string with spaces added after
614 * each word, and remove the last space */
615 cleanstr = sarrayToString(saout, 2);
616 len = strlen(cleanstr);
617 cleanstr[len - 1] = '\0';
618
619 sarrayDestroy(&sa);
620 sarrayDestroy(&saout);
621 return cleanstr;
622}
623
624
625/*
626 * \brief skipToEndOfFunction()
627 *
628 * \param[in] sa output from cpp, by line
629 * \param[in] start index of starting line with left bracket to search
630 * \param[in] lbindex starting char index for left bracket
631 * \param[out] pnext index of line following the ending '}' for function
632 * \return 0 if OK, 1 on error
633 */
634static l_int32
635skipToEndOfFunction(SARRAY *sa,
636 l_int32 start,
637 l_int32 lbindex,
638 l_int32 *pnext)
639{
640l_int32 end, rbindex;
641l_int32 soffsetlb, boffsetlb, toffsetlb;
642
643 PROCNAME("skipToEndOfFunction");
644
645 if (!sa)
646 return ERROR_INT("sa not defined", procName, 1);
647 if (!pnext)
648 return ERROR_INT("&next not defined", procName, 1);
649
650 getOffsetForCharacter(sa, start, '{', &soffsetlb, &boffsetlb,
651 &toffsetlb);
652 skipToMatchingBrace(sa, start + soffsetlb, boffsetlb, &end, &rbindex);
653 if (end == -1) { /* shouldn't happen! */
654 *pnext = -1;
655 return 1;
656 }
657
658 *pnext = end + 1;
659 return 0;
660}
661
662
663/*
664 * \brief skipToMatchingBrace()
665 *
666 * \param[in] sa output from cpp, by line
667 * \param[in] start index of starting line with left bracket to search
668 * \param[in] lbindex starting char index for left bracket
669 * \param[out] pstop index of line with the matching right bracket
670 * \param[out] prbindex char index of matching right bracket
671 * \return 0 if OK, 1 on error
672 *
673 * <pre>
674 * Notes:
675 * (1) If the matching right brace is not found, returns
676 * stop = -1. This shouldn't happen.
677 * </pre>
678 */
679static l_int32
680skipToMatchingBrace(SARRAY *sa,
681 l_int32 start,
682 l_int32 lbindex,
683 l_int32 *pstop,
684 l_int32 *prbindex)
685{
686char *str;
687l_int32 i, j, jstart, n, sumbrace, found, instring, nchars;
688
689 PROCNAME("skipToMatchingBrace");
690
691 if (!sa)
692 return ERROR_INT("sa not defined", procName, 1);
693 if (!pstop)
694 return ERROR_INT("&stop not defined", procName, 1);
695 if (!prbindex)
696 return ERROR_INT("&rbindex not defined", procName, 1);
697
698 instring = 0; /* init to FALSE; toggle on double quotes */
699 *pstop = -1;
700 n = sarrayGetCount(sa);
701 sumbrace = 1;
702 found = FALSE;
703 for (i = start; i < n; i++) {
704 str = sarrayGetString(sa, i, L_NOCOPY);
705 jstart = 0;
706 if (i == start)
707 jstart = lbindex + 1;
708 nchars = strlen(str);
709 for (j = jstart; j < nchars; j++) {
710 /* Toggle the instring state every time you encounter
711 * a double quote that is NOT escaped. */
712 if (j == jstart && str[j] == '\"')
713 instring = 1 - instring;
714 if (j > jstart && str[j] == '\"' && str[j-1] != '\\')
715 instring = 1 - instring;
716 /* Record the braces if they are neither a literal character
717 * nor within a string. */
718 if (str[j] == '{' && str[j+1] != '\'' && !instring) {
719 sumbrace++;
720 } else if (str[j] == '}' && str[j+1] != '\'' && !instring) {
721 sumbrace--;
722 if (sumbrace == 0) {
723 found = TRUE;
724 *prbindex = j;
725 break;
726 }
727 }
728 }
729 if (found) {
730 *pstop = i;
731 return 0;
732 }
733 }
734
735 return ERROR_INT("matching right brace not found", procName, 1);
736}
737
738
739/*
740 * \brief skipToSemicolon()
741 *
742 * \param[in] sa output from cpp, by line
743 * \param[in] start index of starting line to search
744 * \param[in] charindex starting char index for search
745 * \param[out] pnext index of line containing the next ';'
746 * \return 0 if OK, 1 on error
747 *
748 * <pre>
749 * Notes:
750 * (1) If the semicolon isn't found, returns next = -1.
751 * This shouldn't happen.
752 * (2) This is only used in contexts where the semicolon is
753 * not within a string.
754 * </pre>
755 */
756static l_int32
757skipToSemicolon(SARRAY *sa,
758 l_int32 start,
759 l_int32 charindex,
760 l_int32 *pnext)
761{
762char *str;
763l_int32 i, j, n, jstart, nchars, found;
764
765 PROCNAME("skipToSemicolon");
766
767 if (!sa)
768 return ERROR_INT("sa not defined", procName, 1);
769 if (!pnext)
770 return ERROR_INT("&next not defined", procName, 1);
771
772 *pnext = -1;
773 n = sarrayGetCount(sa);
774 found = FALSE;
775 for (i = start; i < n; i++) {
776 str = sarrayGetString(sa, i, L_NOCOPY);
777 jstart = 0;
778 if (i == start)
779 jstart = charindex + 1;
780 nchars = strlen(str);
781 for (j = jstart; j < nchars; j++) {
782 if (str[j] == ';') {
783 found = TRUE;;
784 break;
785 }
786 }
787 if (found) {
788 *pnext = i;
789 return 0;
790 }
791 }
792
793 return ERROR_INT("semicolon not found", procName, 1);
794}
795
796
797/*
798 * \brief getOffsetForCharacter()
799 *
800 * \param[in] sa output from cpp, by line
801 * \param[in] start starting index in sa to search;
802 * never a comment line
803 * \param[in] tchar we are searching for the first instance of this
804 * \param[out] psoffset offset in strings from start index
805 * \param[out] pboffset offset in bytes within string in which
806 * the character is first found
807 * \param[out] ptoffset offset in total bytes from beginning of string
808 * indexed by 'start' to the location where
809 * the character is first found
810 * \return 0 if OK, 1 on error
811 *
812 * <pre>
813 * Notes:
814 * (1) We are searching for the first instance of 'tchar', starting
815 * at the beginning of the string indexed by start.
816 * (2) If the character is not found, soffset is returned as -1,
817 * and the other offsets are set to very large numbers. The
818 * caller must check the value of soffset.
819 * (3) This is only used in contexts where it is not necessary to
820 * consider if the character is inside a string.
821 * </pre>
822 */
823static l_int32
824getOffsetForCharacter(SARRAY *sa,
825 l_int32 start,
826 char tchar,
827 l_int32 *psoffset,
828 l_int32 *pboffset,
829 l_int32 *ptoffset)
830{
831char *str;
832l_int32 i, j, n, nchars, totchars, found;
833
834 PROCNAME("getOffsetForCharacter");
835
836 if (!sa)
837 return ERROR_INT("sa not defined", procName, 1);
838 if (!psoffset)
839 return ERROR_INT("&soffset not defined", procName, 1);
840 if (!pboffset)
841 return ERROR_INT("&boffset not defined", procName, 1);
842 if (!ptoffset)
843 return ERROR_INT("&toffset not defined", procName, 1);
844
845 *psoffset = -1; /* init to not found */
846 *pboffset = 100000000;
847 *ptoffset = 100000000;
848
849 n = sarrayGetCount(sa);
850 found = FALSE;
851 totchars = 0;
852 for (i = start; i < n; i++) {
853 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
854 return ERROR_INT("str not returned; shouldn't happen", procName, 1);
855 nchars = strlen(str);
856 for (j = 0; j < nchars; j++) {
857 if (str[j] == tchar) {
858 found = TRUE;
859 break;
860 }
861 }
862 if (found)
863 break;
864 totchars += nchars;
865 }
866
867 if (found) {
868 *psoffset = i - start;
869 *pboffset = j;
870 *ptoffset = totchars + j;
871 }
872
873 return 0;
874}
875
876
877/*
878 * \brief getOffsetForMatchingRP()
879 *
880 * \param[in] sa output from cpp, by line
881 * \param[in] start starting index in sa to search;
882 * never a comment line
883 * \param[in] soffsetlp string offset to first LP
884 * \param[in] boffsetlp byte offset within string to first LP
885 * \param[in] toffsetlp total byte offset to first LP
886 * \param[out] psoffset offset in strings from start index
887 * \param[out] pboffset offset in bytes within string in which
888 * the matching RP is found
889 * \param[out] ptoffset offset in total bytes from beginning of string
890 * indexed by 'start' to the location where
891 * the matching RP is found
892 * \return 0 if OK, 1 on error
893 *
894 * <pre>
895 * Notes:
896 * (1) We are searching for the matching right parenthesis (RP) that
897 * corresponds to the first LP found beginning at the string
898 * indexed by start.
899 * (2) If the matching RP is not found, soffset is returned as -1,
900 * and the other offsets are set to very large numbers. The
901 * caller must check the value of soffset.
902 * (3) This is only used in contexts where it is not necessary to
903 * consider if the character is inside a string.
904 * (4) We must do this because although most arg lists have a single
905 * left and right parenthesis, it is possible to construct
906 * more complicated prototype declarations, such as those
907 * where functions are passed in. The C++ rules for prototypes
908 * are strict, and require that for functions passed in as args,
909 * the function name arg be placed in parenthesis, as well
910 * as its arg list, thus incurring two extra levels of parentheses.
911 * </pre>
912 */
913static l_int32
914getOffsetForMatchingRP(SARRAY *sa,
915 l_int32 start,
916 l_int32 soffsetlp,
917 l_int32 boffsetlp,
918 l_int32 toffsetlp,
919 l_int32 *psoffset,
920 l_int32 *pboffset,
921 l_int32 *ptoffset)
922{
923char *str;
924l_int32 i, j, n, nchars, totchars, leftmatch, firstline, jstart, found;
925
926 PROCNAME("getOffsetForMatchingRP");
927
928 if (!sa)
929 return ERROR_INT("sa not defined", procName, 1);
930 if (!psoffset)
931 return ERROR_INT("&soffset not defined", procName, 1);
932 if (!pboffset)
933 return ERROR_INT("&boffset not defined", procName, 1);
934 if (!ptoffset)
935 return ERROR_INT("&toffset not defined", procName, 1);
936
937 *psoffset = -1; /* init to not found */
938 *pboffset = 100000000;
939 *ptoffset = 100000000;
940
941 n = sarrayGetCount(sa);
942 found = FALSE;
943 totchars = toffsetlp;
944 leftmatch = 1; /* count of (LP - RP); we're finished when it goes to 0. */
945 firstline = start + soffsetlp;
946 for (i = firstline; i < n; i++) {
947 if ((str = sarrayGetString(sa, i, L_NOCOPY)) == NULL)
948 return ERROR_INT("str not returned; shouldn't happen", procName, 1);
949 nchars = strlen(str);
950 jstart = 0;
951 if (i == firstline)
952 jstart = boffsetlp + 1;
953 for (j = jstart; j < nchars; j++) {
954 if (str[j] == '(')
955 leftmatch++;
956 else if (str[j] == ')')
957 leftmatch--;
958 if (leftmatch == 0) {
959 found = TRUE;
960 break;
961 }
962 }
963 if (found)
964 break;
965 if (i == firstline)
966 totchars += nchars - boffsetlp;
967 else
968 totchars += nchars;
969 }
970
971 if (found) {
972 *psoffset = i - start;
973 *pboffset = j;
974 *ptoffset = totchars + j;
975 }
976
977 return 0;
978}
#define L_BUF_SIZE
Definition: classapp.c:59
@ L_COPY
Definition: pix.h:712
@ L_NOCOPY
Definition: pix.h:710
@ L_INSERT
Definition: pix.h:711
SARRAY * sarrayCreate(l_int32 n)
sarrayCreate()
Definition: sarray1.c:170
char * sarrayToString(SARRAY *sa, l_int32 addnlflag)
sarrayToString()
Definition: sarray1.c:785
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
SARRAY * sarrayCreateLinesFromString(const char *string, l_int32 blankflag)
sarrayCreateLinesFromString()
Definition: sarray1.c:283
l_ok sarrayAddString(SARRAY *sa, const char *string, l_int32 copyflag)
sarrayAddString()
Definition: sarray1.c:451
SARRAY * sarrayCreateWordsFromString(const char *string)
sarrayCreateWordsFromString()
Definition: sarray1.c:233
Definition: array.h:127
l_uint8 * l_binaryRead(const char *filename, size_t *pnbytes)
l_binaryRead()
Definition: utils2.c:1352
char * stringJoin(const char *src1, const char *src2)
stringJoin()
Definition: utils2.c:518