Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
pdfio2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
105#ifdef HAVE_CONFIG_H
106#include <config_auto.h>
107#endif /* HAVE_CONFIG_H */
108
109#include <string.h>
110#include <math.h>
111#include "allheaders.h"
112
113/* --------------------------------------------*/
114#if USE_PDFIO /* defined in environ.h */
115 /* --------------------------------------------*/
116
117 /* Typical scan resolution in ppi (pixels/inch) */
118static const l_int32 DefaultInputRes = 300;
119
120 /* Static helpers */
121static L_COMP_DATA *l_generateJp2kData(const char *fname);
122static L_COMP_DATA *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);
123static L_COMP_DATA *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,
124 l_int32 quality);
125static L_COMP_DATA *pixGenerateJp2kData(PIX *pixs, l_int32 quality);
126static L_COMP_DATA *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);
127
128static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes,
129 L_PDF_DATA *lpd);
130static void generateFixedStringsPdf(L_PDF_DATA *lpd);
131static char *generateEscapeString(const char *str);
132static void generateMediaboxPdf(L_PDF_DATA *lpd);
133static l_int32 generatePageStringPdf(L_PDF_DATA *lpd);
134static l_int32 generateContentStringPdf(L_PDF_DATA *lpd);
135static l_int32 generatePreXStringsPdf(L_PDF_DATA *lpd);
136static l_int32 generateColormapStringsPdf(L_PDF_DATA *lpd);
137static void generateTrailerPdf(L_PDF_DATA *lpd);
138static char *makeTrailerStringPdf(L_DNA *daloc);
139static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,
140 L_PDF_DATA *lpd);
141
142static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);
143static char *generatePagesObjStringPdf(NUMA *napage);
144static L_BYTEA *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);
145
146static L_PDF_DATA *pdfdataCreate(const char *title);
147static void pdfdataDestroy(L_PDF_DATA **plpd);
148static L_COMP_DATA *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);
149
150
151/* ---------------- Defaults for rendering options ----------------- */
152 /* Output G4 as writing through image mask; this is the default */
153static l_int32 var_WRITE_G4_IMAGE_MASK = 1;
154 /* Write date/time and lib version into pdf; this is the default */
155static l_int32 var_WRITE_DATE_AND_VERSION = 1;
156
157#define L_SMALLBUF 256
158#define L_BIGBUF 2048 /* must be able to hold hex colormap */
159
160
161#ifndef NO_CONSOLE_IO
162#define DEBUG_MULTIPAGE 0
163#endif /* ~NO_CONSOLE_IO */
164
165
166/*---------------------------------------------------------------------*
167 * Intermediate function for generating multipage pdf output *
168 *---------------------------------------------------------------------*/
200l_ok
202 l_int32 type,
203 l_int32 quality,
204 l_uint8 **pdata,
205 size_t *pnbytes,
206 l_int32 x,
207 l_int32 y,
208 l_int32 res,
209 const char *title,
210 L_PDF_DATA **plpd,
211 l_int32 position)
212{
213l_int32 pixres, w, h, ret;
214l_float32 xpt, ypt, wpt, hpt;
215L_COMP_DATA *cid = NULL;
216L_PDF_DATA *lpd = NULL;
217
218 if (!pdata)
219 return ERROR_INT("&data not defined", __func__, 1);
220 *pdata = NULL;
221 if (!pnbytes)
222 return ERROR_INT("&nbytes not defined", __func__, 1);
223 *pnbytes = 0;
224 if (!pix)
225 return ERROR_INT("pix not defined", __func__, 1);
226 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
227 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
228 selectDefaultPdfEncoding(pix, &type);
229 }
230 if (quality < 0 || quality > 100)
231 return ERROR_INT("invalid quality", __func__, 1);
232
233 if (plpd) { /* part of multi-page invocation */
234 if (position == L_FIRST_IMAGE)
235 *plpd = NULL;
236 }
237
238 /* Generate the compressed image data. It must NOT
239 * be ascii85 encoded. */
240 pixGenerateCIData(pix, type, quality, 0, &cid);
241 if (!cid)
242 return ERROR_INT("cid not made", __func__, 1);
243
244 /* Get media box in pts. Guess the input image resolution
245 * based on the input parameter %res, the resolution data in
246 * the pix, and the size of the image. */
247 pixres = cid->res;
248 w = cid->w;
249 h = cid->h;
250 if (res <= 0.0)
251 res = (pixres > 0) ? pixres : DefaultInputRes;
252 xpt = x * 72.f / res;
253 ypt = y * 72.f / res;
254 wpt = w * 72.f / res;
255 hpt = h * 72.f / res;
256
257 /* Set up lpd */
258 if (!plpd) { /* single image */
259 if ((lpd = pdfdataCreate(title)) == NULL)
260 return ERROR_INT("lpd not made", __func__, 1);
261 } else if (position == L_FIRST_IMAGE) { /* first of multiple images */
262 if ((lpd = pdfdataCreate(title)) == NULL)
263 return ERROR_INT("lpd not made", __func__, 1);
264 *plpd = lpd;
265 } else { /* not the first of multiple images */
266 lpd = *plpd;
267 }
268
269 /* Add the data to the lpd */
270 ptraAdd(lpd->cida, cid);
271 lpd->n++;
272 ptaAddPt(lpd->xy, xpt, ypt);
273 ptaAddPt(lpd->wh, wpt, hpt);
274
275 /* If a single image or the last of multiple images,
276 * generate the pdf and destroy the lpd */
277 if (!plpd || (position == L_LAST_IMAGE)) {
278 ret = l_generatePdf(pdata, pnbytes, lpd);
279 pdfdataDestroy(&lpd);
280 if (plpd) *plpd = NULL;
281 if (ret)
282 return ERROR_INT("pdf output not made", __func__, 1);
283 }
284
285 return 0;
286}
287
288
289/*---------------------------------------------------------------------*
290 * Intermediate function for generating multipage pdf output *
291 *---------------------------------------------------------------------*/
328l_ok
330 SARRAY *sa,
331 l_uint8 **pdata,
332 size_t *pnbytes)
333{
334char *fname, *str_pages, *str_trailer;
335l_uint8 *pdfdata, *data;
336l_int32 i, j, index, nobj, npages;
337l_int32 *sizes, *locs;
338size_t size;
339L_BYTEA *bas, *bad, *bat1, *bat2;
340L_DNA *da_locs, *da_sizes, *da_outlocs, *da;
341L_DNAA *daa_locs; /* object locations on each page */
342NUMA *na_objs, *napage;
343NUMAA *naa_objs; /* object mapping numbers to new values */
344
345 if (!pdata)
346 return ERROR_INT("&data not defined", __func__, 1);
347 *pdata = NULL;
348 if (!pnbytes)
349 return ERROR_INT("&nbytes not defined", __func__, 1);
350 *pnbytes = 0;
351 if (!pa_data)
352 return ERROR_INT("pa_data not defined", __func__, 1);
353
354 /* Parse the files and find the object locations.
355 * Remove file data that cannot be parsed. */
356 ptraGetActualCount(pa_data, &npages);
357 daa_locs = l_dnaaCreate(npages);
358 for (i = 0; i < npages; i++) {
359 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
360 if (parseTrailerPdf(bas, &da_locs) != 0) {
361 bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
362 l_byteaDestroy(&bas);
363 if (sa) {
364 fname = sarrayGetString(sa, i, L_NOCOPY);
365 L_ERROR("can't parse file %s; skipping\n", __func__, fname);
366 } else {
367 L_ERROR("can't parse file %d; skipping\n", __func__, i);
368 }
369 } else {
370 l_dnaaAddDna(daa_locs, da_locs, L_INSERT);
371 }
372 }
373
374 /* Recompute npages in case some of the files were not pdf */
375 ptraCompactArray(pa_data);
376 ptraGetActualCount(pa_data, &npages);
377 if (npages == 0) {
378 l_dnaaDestroy(&daa_locs);
379 return ERROR_INT("no parsable pdf files found", __func__, 1);
380 }
381
382 /* Find the mapping from initial to final object numbers */
383 naa_objs = numaaCreate(npages); /* stores final object numbers */
384 napage = numaCreate(npages); /* stores "Page" object numbers */
385 index = 0;
386 for (i = 0; i < npages; i++) {
387 da = l_dnaaGetDna(daa_locs, i, L_CLONE);
388 nobj = l_dnaGetCount(da);
389 if (i == 0) {
390 numaAddNumber(napage, 4); /* object 4 on first page */
391 na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
392 index = nobj - 1;
393 } else { /* skip the first 3 objects in each file */
394 numaAddNumber(napage, index); /* Page object is first we add */
395 na_objs = numaMakeConstant(0.0, nobj - 1);
396 numaReplaceNumber(na_objs, 3, 3); /* refers to parent of all */
397 for (j = 4; j < nobj - 1; j++)
398 numaSetValue(na_objs, j, index++);
399 }
400 numaaAddNuma(naa_objs, na_objs, L_INSERT);
401 l_dnaDestroy(&da);
402 }
403
404 /* Make the Pages object (#3) */
405 str_pages = generatePagesObjStringPdf(napage);
406
407 /* Build the output */
408 bad = l_byteaCreate(5000);
409 da_outlocs = l_dnaCreate(0); /* locations of all output objects */
410 for (i = 0; i < npages; i++) {
411 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
412 pdfdata = l_byteaGetData(bas, &size);
413 da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE); /* locs on this page */
414 na_objs = numaaGetNuma(naa_objs, i, L_CLONE); /* obj # on this page */
415 nobj = l_dnaGetCount(da_locs) - 1;
416 da_sizes = l_dnaDiffAdjValues(da_locs); /* object sizes on this page */
417 sizes = l_dnaGetIArray(da_sizes);
418 locs = l_dnaGetIArray(da_locs);
419 if (i == 0) {
420 l_byteaAppendData(bad, pdfdata, sizes[0]);
421 l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
422 l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
423 l_byteaAppendString(bad, str_pages);
424 for (j = 0; j < 4; j++)
425 l_dnaAddNumber(da_outlocs, locs[j]);
426 }
427 for (j = 4; j < nobj; j++) {
428 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
429 bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
430 bat2 = substituteObjectNumbers(bat1, na_objs);
431 data = l_byteaGetData(bat2, &size);
432 l_byteaAppendData(bad, data, size);
433 l_byteaDestroy(&bat1);
434 l_byteaDestroy(&bat2);
435 }
436 if (i == npages - 1) /* last one */
437 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
438 LEPT_FREE(sizes);
439 LEPT_FREE(locs);
440 l_dnaDestroy(&da_locs);
441 numaDestroy(&na_objs);
442 l_dnaDestroy(&da_sizes);
443 }
444
445 /* Add the trailer */
446 str_trailer = makeTrailerStringPdf(da_outlocs);
447 l_byteaAppendString(bad, str_trailer);
448
449 /* Transfer the output data */
450 *pdata = l_byteaCopyData(bad, pnbytes);
451 l_byteaDestroy(&bad);
452
453#if DEBUG_MULTIPAGE
454 lept_stderr("******** object mapper **********");
455 numaaWriteStream(stderr, naa_objs);
456
457 lept_stderr("******** Page object numbers ***********");
458 numaWriteStderr(napage);
459
460 lept_stderr("******** Pages object ***********\n");
461 lept_stderr("%s\n", str_pages);
462#endif /* DEBUG_MULTIPAGE */
463
464 numaDestroy(&napage);
465 numaaDestroy(&naa_objs);
466 l_dnaDestroy(&da_outlocs);
467 l_dnaaDestroy(&daa_locs);
468 LEPT_FREE(str_pages);
469 LEPT_FREE(str_trailer);
470 return 0;
471}
472
473
474/*---------------------------------------------------------------------*
475 * Convert tiff multipage to pdf file *
476 *---------------------------------------------------------------------*/
490l_ok
491convertTiffMultipageToPdf(const char *filein,
492 const char *fileout)
493{
494l_int32 istiff;
495PIXA *pixa;
496FILE *fp;
497
498 if ((fp = fopenReadStream(filein)) == NULL)
499 return ERROR_INT_1("file not found", filein, __func__, 1);
500 istiff = fileFormatIsTiff(fp);
501 fclose(fp);
502 if (!istiff)
503 return ERROR_INT_1("file not tiff format", filein, __func__, 1);
504
505 pixa = pixaReadMultipageTiff(filein);
506 pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);
507 pixaDestroy(&pixa);
508 return 0;
509}
510
511
512/*---------------------------------------------------------------------*
513 * CID-based operations *
514 *---------------------------------------------------------------------*/
542l_ok
543l_generateCIDataForPdf(const char *fname,
544 PIX *pix,
545 l_int32 quality,
546 L_COMP_DATA **pcid)
547{
548l_int32 format, type;
549L_COMP_DATA *cid;
550PIX *pixt;
551
552 if (!pcid)
553 return ERROR_INT("&cid not defined", __func__, 1);
554 *pcid = cid = NULL;
555 if (!fname && !pix)
556 return ERROR_INT("neither fname nor pix are defined", __func__, 1);
557
558 /* If a compressed file is given that is not 'stdin', see if we
559 * can generate the pdf output without transcoding. */
560 if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) {
561 findFileFormat(fname, &format);
562 if (format == IFF_UNKNOWN)
563 L_WARNING("file %s format is unknown\n", __func__, fname);
564 if (format == IFF_PS || format == IFF_LPDF) {
565 L_ERROR("file %s is unsupported format %d\n",
566 __func__, fname, format);
567 return 1;
568 }
569 if (format == IFF_JFIF_JPEG) {
570 cid = l_generateJpegData(fname, 0);
571 } else if (format == IFF_JP2) {
572 cid = l_generateJp2kData(fname);
573 } else if (format == IFF_PNG) {
574 cid = l_generateFlateDataPdf(fname, pix);
575 }
576 }
577
578 /* Otherwise, use the pix to generate the pdf output */
579 if (!cid) {
580 if (!pix)
581 pixt = pixRead(fname);
582 else
583 pixt = pixClone(pix);
584 if (!pixt)
585 return ERROR_INT("pixt not made", __func__, 1);
586 if (selectDefaultPdfEncoding(pixt, &type)) {
587 pixDestroy(&pixt);
588 return 1;
589 }
590 pixGenerateCIData(pixt, type, quality, 0, &cid);
591 pixDestroy(&pixt);
592 if (!cid)
593 return ERROR_INT("cid not made from pix", __func__, 1);
594 }
595 *pcid = cid;
596 return 0;
597}
598
599
624l_ok
625l_generateCIData(const char *fname,
626 l_int32 type,
627 l_int32 quality,
628 l_int32 ascii85,
629 L_COMP_DATA **pcid)
630{
631l_int32 format, d, bps, spp, iscmap;
632L_COMP_DATA *cid;
633PIX *pix;
634
635 if (!pcid)
636 return ERROR_INT("&cid not defined", __func__, 1);
637 *pcid = NULL;
638 if (!fname)
639 return ERROR_INT("fname not defined", __func__, 1);
640 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
641 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)
642 return ERROR_INT("invalid conversion type", __func__, 1);
643 if (ascii85 != 0 && ascii85 != 1)
644 return ERROR_INT("invalid ascii85", __func__, 1);
645
646 /* Sanity check on requested encoding */
647 pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
648 d = bps * spp;
649 if (d == 24) d = 32;
650 if (iscmap && type != L_FLATE_ENCODE) {
651 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
652 type = L_FLATE_ENCODE;
653 } else if (d < 8 && type == L_JPEG_ENCODE) {
654 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
655 type = L_FLATE_ENCODE;
656 } else if (d < 8 && type == L_JP2K_ENCODE) {
657 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
658 type = L_FLATE_ENCODE;
659 } else if (d > 1 && type == L_G4_ENCODE) {
660 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
661 type = L_FLATE_ENCODE;
662 }
663
664 if (type == L_JPEG_ENCODE) {
665 if (format == IFF_JFIF_JPEG) { /* do not transcode */
666 cid = l_generateJpegData(fname, ascii85);
667 } else {
668 if ((pix = pixRead(fname)) == NULL)
669 return ERROR_INT("pix not returned for JPEG", __func__, 1);
670 cid = pixGenerateJpegData(pix, ascii85, quality);
671 pixDestroy(&pix);
672 }
673 if (!cid)
674 return ERROR_INT("jpeg data not made", __func__, 1);
675 } else if (type == L_JP2K_ENCODE) {
676 if (format == IFF_JP2) { /* do not transcode */
677 cid = l_generateJp2kData(fname);
678 } else {
679 if ((pix = pixRead(fname)) == NULL)
680 return ERROR_INT("pix not returned for JP2K", __func__, 1);
681 cid = pixGenerateJp2kData(pix, quality);
682 pixDestroy(&pix);
683 }
684 if (!cid)
685 return ERROR_INT("jp2k data not made", __func__, 1);
686 } else if (type == L_G4_ENCODE) {
687 if ((pix = pixRead(fname)) == NULL)
688 return ERROR_INT("pix not returned for G4", __func__, 1);
689 cid = pixGenerateG4Data(pix, ascii85);
690 pixDestroy(&pix);
691 if (!cid)
692 return ERROR_INT("g4 data not made", __func__, 1);
693 } else if (type == L_FLATE_ENCODE) {
694 if ((cid = l_generateFlateData(fname, ascii85)) == NULL)
695 return ERROR_INT("flate data not made", __func__, 1);
696 } else {
697 return ERROR_INT("invalid conversion type", __func__, 1);
698 }
699 *pcid = cid;
700
701 return 0;
702}
703
704
705/*---------------------------------------------------------------------*
706 * Low-level CID-based operations *
707 *---------------------------------------------------------------------*/
727l_generateFlateDataPdf(const char *fname,
728 PIX *pixs)
729{
730l_uint8 *pngcomp = NULL; /* entire PNG compressed file */
731l_uint8 *datacomp = NULL; /* gzipped raster data */
732l_uint8 *cmapdata = NULL; /* uncompressed colormap */
733char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
734l_uint32 i, j, n;
735l_int32 format, interlaced;
736l_int32 ncolors; /* in colormap */
737l_int32 bps; /* bits/sample: usually 8 */
738l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */
739l_int32 w, h, cmapflag;
740l_int32 xres, yres;
741size_t nbytescomp = 0, nbytespng = 0;
742FILE *fp;
743L_COMP_DATA *cid;
744PIX *pix;
745PIXCMAP *cmap = NULL;
746
747 if (!fname)
748 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
749
750 findFileFormat(fname, &format);
751 spp = 0; /* init to spp != 4 if not png */
752 interlaced = 0; /* initialize to no interlacing */
753 bps = 0; /* initialize to a nonsense value */
754 if (format == IFF_PNG) {
755 isPngInterlaced(fname, &interlaced);
756 if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
757 return (L_COMP_DATA *)ERROR_PTR("bad png input", __func__, NULL);
758 }
759
760 /* PDF is capable of inlining some types of PNG files, but not all
761 of them. We need to transcode anything with interlacing, an
762 alpha channel, or 1 bpp (which would otherwise be photo-inverted).
763
764 Note: any PNG image file with an alpha channel is converted on
765 reading to RGBA (spp == 4). This includes the (gray + alpha) format
766 with spp == 2. Because of the conversion, readHeaderPng() gives
767 spp = 2, whereas pixGetSpp() gives spp = 4 on the converted pix. */
768 if (format != IFF_PNG ||
769 (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
770 { /* lgtm+ analyzer needed the logic expanded */
771 if (!pixs)
772 pix = pixRead(fname);
773 else
774 pix = pixClone(pixs);
775 if (!pix)
776 return (L_COMP_DATA *)ERROR_PTR("pix not made", __func__, NULL);
777 cid = pixGenerateFlateData(pix, 0);
778 pixDestroy(&pix);
779 return cid;
780 }
781
782 /* It's png. Generate the pdf data without transcoding.
783 * Implementation by Jeff Breidenbach.
784 * First, read the metadata */
785 if ((fp = fopenReadStream(fname)) == NULL)
786 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
787 fname, __func__, NULL);
788 freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
789 fgetPngResolution(fp, &xres, &yres);
790 fclose(fp);
791
792 /* We get pdf corruption when inlining the data from 16 bpp png. */
793 if (bps == 16)
794 return l_generateFlateData(fname, 0);
795
796 /* Read the entire png file */
797 if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
798 return (L_COMP_DATA *)ERROR_PTR_1("unable to read file",
799 fname, __func__, NULL);
800
801 /* Extract flate data, copying portions of it to memory, including
802 * the predictor information in a byte at the beginning of each
803 * raster line. The flate data makes up the vast majority of
804 * the png file, so after extraction we expect datacomp to
805 * be nearly full (i.e., nbytescomp will be only slightly less
806 * than nbytespng). Also extract the colormap if present. */
807 if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
808 LEPT_FREE(pngcomp);
809 return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory",
810 __func__, NULL);
811 }
812
813 /* Parse the png file. Each chunk consists of:
814 * length: 4 bytes
815 * name: 4 bytes (e.g., "IDAT")
816 * data: n bytes
817 * CRC: 4 bytes
818 * Start at the beginning of the data section of the first chunk,
819 * byte 16, because the png file begins with 8 bytes of header,
820 * followed by the first 8 bytes of the first chunk
821 * (length and name). On each loop, increment by 12 bytes to
822 * skip over the CRC, length and name of the next chunk. */
823 for (i = 16; i < nbytespng; i += 12) { /* do each successive chunk */
824 /* Get the chunk length */
825 n = pngcomp[i - 8] << 24;
826 n += pngcomp[i - 7] << 16;
827 n += pngcomp[i - 6] << 8;
828 n += pngcomp[i - 5] << 0;
829 if (n >= nbytespng - i) { /* "n + i" can overflow */
830 LEPT_FREE(pngcomp);
831 LEPT_FREE(datacomp);
832 pixcmapDestroy(&cmap);
833 L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
834 i, n, nbytespng);
835 return NULL;
836 }
837
838 /* Is it a data chunk? */
839 if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) {
840 memcpy(datacomp + nbytescomp, pngcomp + i, n);
841 nbytescomp += n;
842 }
843
844 /* Is it a palette chunk? */
845 if (cmapflag && !cmap &&
846 memcmp(pngcomp + i - 4, "PLTE", 4) == 0) {
847 if ((n / 3) > (1 << bps)) {
848 LEPT_FREE(pngcomp);
849 LEPT_FREE(datacomp);
850 pixcmapDestroy(&cmap);
851 L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n",
852 __func__, i, n, (1 << bps));
853 return NULL;
854 }
855 cmap = pixcmapCreate(bps);
856 for (j = i; j < i + n; j += 3) {
857 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
858 pngcomp[j + 2]);
859 }
860 }
861 i += n; /* move to the end of the data chunk */
862 }
863 LEPT_FREE(pngcomp);
864
865 if (nbytescomp == 0) {
866 LEPT_FREE(datacomp);
867 pixcmapDestroy(&cmap);
868 return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", __func__, NULL);
869 }
870
871 /* Extract and encode the colormap data as hexascii */
872 ncolors = 0;
873 if (cmap) {
874 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
875 pixcmapDestroy(&cmap);
876 if (!cmapdata) {
877 LEPT_FREE(datacomp);
878 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
879 __func__, NULL);
880 }
881 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
882 LEPT_FREE(cmapdata);
883 }
884
885 /* Note that this is the only situation where the predictor
886 * field of the CID is set to 1. Adobe's predictor values on
887 * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and
888 * 10-14 for inline predictors, the specifics of which are
889 * ignored by the pdf interpreter, which just needs to know that
890 * the first byte on each compressed scanline is some predictor
891 * whose type can be inferred from the byte itself. */
892 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
893 cid->datacomp = datacomp;
894 cid->type = L_FLATE_ENCODE;
895 cid->cmapdatahex = cmapdatahex;
896 cid->nbytescomp = nbytescomp;
897 cid->ncolors = ncolors;
898 cid->predictor = TRUE;
899 cid->w = w;
900 cid->h = h;
901 cid->bps = bps;
902 cid->spp = spp;
903 cid->res = xres;
904 return cid;
905}
906
907
925l_generateJpegData(const char *fname,
926 l_int32 ascii85flag)
927{
928char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
929l_uint8 *data = NULL;
930l_int32 w, h, xres, yres, bps, spp;
931size_t nbytes, nbytes85;
932L_COMP_DATA *cid;
933FILE *fp;
934
935 if (!fname)
936 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
937
938 if (ascii85flag != 0 && ascii85flag != 1)
939 return (L_COMP_DATA *)ERROR_PTR("wrong ascii85flags", __func__, NULL);
940
941 /* Read the metadata */
942 if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
943 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
944 bps = 8;
945 if ((fp = fopenReadStream(fname)) == NULL)
946 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
947 fname, __func__, NULL);
948 fgetJpegResolution(fp, &xres, &yres);
949 fclose(fp);
950
951 /* Read the entire jpeg file. The returned jpeg data in memory
952 * starts with ffd8 and ends with ffd9 */
953 if ((data = l_binaryRead(fname, &nbytes)) == NULL)
954 return (L_COMP_DATA *)ERROR_PTR_1("data not extracted",
955 fname, __func__, NULL);
956
957 /* Optionally, encode the compressed data */
958 if (ascii85flag == 1) {
959 data85 = encodeAscii85(data, nbytes, &nbytes85);
960 LEPT_FREE(data);
961 if (!data85)
962 return (L_COMP_DATA *)ERROR_PTR_1("data85 not made",
963 fname, __func__, NULL);
964 else
965 data85[nbytes85 - 1] = '\0'; /* remove the newline */
966 }
967
968 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
969 if (ascii85flag == 0) {
970 cid->datacomp = data;
971 } else { /* ascii85 */
972 cid->data85 = data85;
973 cid->nbytes85 = nbytes85;
974 }
975 cid->type = L_JPEG_ENCODE;
976 cid->nbytescomp = nbytes;
977 cid->w = w;
978 cid->h = h;
979 cid->bps = bps;
980 cid->spp = spp;
981 cid->res = xres;
982 return cid;
983}
984
985
1003 size_t nbytes,
1004 l_int32 ascii85flag)
1005{
1006char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
1007l_int32 w, h, xres, yres, bps, spp;
1008size_t nbytes85;
1009L_COMP_DATA *cid;
1010
1011 if (!data)
1012 return (L_COMP_DATA *)ERROR_PTR("data not defined", __func__, NULL);
1013
1014 /* Read the metadata */
1015 if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
1016 LEPT_FREE(data);
1017 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
1018 }
1019 bps = 8;
1020 readResolutionMemJpeg(data, nbytes, &xres, &yres);
1021
1022 /* Optionally, encode the compressed data */
1023 if (ascii85flag == 1) {
1024 data85 = encodeAscii85(data, nbytes, &nbytes85);
1025 LEPT_FREE(data);
1026 if (!data85)
1027 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1028 else
1029 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1030 }
1031
1032 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1033 if (ascii85flag == 0) {
1034 cid->datacomp = data;
1035 } else { /* ascii85 */
1036 cid->data85 = data85;
1037 cid->nbytes85 = nbytes85;
1038 }
1039 cid->type = L_JPEG_ENCODE;
1040 cid->nbytescomp = nbytes;
1041 cid->w = w;
1042 cid->h = h;
1043 cid->bps = bps;
1044 cid->spp = spp;
1045 cid->res = xres;
1046 return cid;
1047}
1048
1049
1061static L_COMP_DATA *
1062l_generateJp2kData(const char *fname)
1063{
1064l_int32 w, h, bps, spp, xres, yres;
1065size_t nbytes;
1066L_COMP_DATA *cid;
1067FILE *fp;
1068
1069 if (!fname)
1070 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1071
1072 if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
1073 return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", __func__, NULL);
1074
1075 /* The returned jp2k data in memory is the entire jp2k file */
1076 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1077 if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
1078 l_CIDataDestroy(&cid);
1079 return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL);
1080 }
1081
1082 xres = yres = 0;
1083 if ((fp = fopenReadStream(fname)) != NULL) {
1084 fgetJp2kResolution(fp, &xres, &yres);
1085 fclose(fp);
1086 }
1087 cid->type = L_JP2K_ENCODE;
1088 cid->nbytescomp = nbytes;
1089 cid->w = w;
1090 cid->h = h;
1091 cid->bps = bps;
1092 cid->spp = spp;
1093 cid->res = xres;
1094 return cid;
1095}
1096
1097
1114l_generateG4Data(const char *fname,
1115 l_int32 ascii85flag)
1116{
1117l_uint8 *datacomp = NULL; /* g4 compressed raster data */
1118char *data85 = NULL; /* ascii85 encoded g4 compressed data */
1119l_int32 w, h, xres, yres, npages;
1120l_int32 minisblack; /* TRUE or FALSE */
1121size_t nbytes85, nbytescomp;
1122L_COMP_DATA *cid;
1123FILE *fp;
1124
1125 if (!fname)
1126 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1127
1128 /* Make sure this is a single page tiff file */
1129 if ((fp = fopenReadStream(fname)) == NULL)
1130 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
1131 fname, __func__, NULL);
1132 tiffGetCount(fp, &npages);
1133 fclose(fp);
1134 if (npages != 1) {
1135 L_ERROR(" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname);
1136 return NULL;
1137 }
1138
1139 /* Read the resolution */
1140 if ((fp = fopenReadStream(fname)) == NULL)
1141 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
1142 fname, __func__, NULL);
1143 getTiffResolution(fp, &xres, &yres);
1144 fclose(fp);
1145
1146 /* The returned ccitt g4 data in memory is the block of
1147 * bytes in the tiff file, starting after 8 bytes and
1148 * ending before the directory. */
1149 if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
1150 &w, &h, &minisblack)) {
1151 return (L_COMP_DATA *)ERROR_PTR_1("datacomp not extracted",
1152 fname, __func__, NULL);
1153 }
1154
1155 /* Optionally, encode the compressed data */
1156 if (ascii85flag == 1) {
1157 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1158 LEPT_FREE(datacomp);
1159 if (!data85)
1160 return (L_COMP_DATA *)ERROR_PTR_1("data85 not made",
1161 fname, __func__, NULL);
1162 else
1163 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1164 }
1165
1166 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1167 if (ascii85flag == 0) {
1168 cid->datacomp = datacomp;
1169 } else { /* ascii85 */
1170 cid->data85 = data85;
1171 cid->nbytes85 = nbytes85;
1172 }
1173 cid->type = L_G4_ENCODE;
1174 cid->nbytescomp = nbytescomp;
1175 cid->w = w;
1176 cid->h = h;
1177 cid->bps = 1;
1178 cid->spp = 1;
1179 cid->minisblack = minisblack;
1180 cid->res = xres;
1181 return cid;
1182}
1183
1184
1205l_ok
1207 l_int32 type,
1208 l_int32 quality,
1209 l_int32 ascii85,
1210 L_COMP_DATA **pcid)
1211{
1212l_int32 w, h, d, maxAsp;
1213PIXCMAP *cmap;
1214
1215 if (!pcid)
1216 return ERROR_INT("&cid not defined", __func__, 1);
1217 *pcid = NULL;
1218 if (!pixs)
1219 return ERROR_INT("pixs not defined", __func__, 1);
1220 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1221 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1222 selectDefaultPdfEncoding(pixs, &type);
1223 }
1224 if (ascii85 != 0 && ascii85 != 1)
1225 return ERROR_INT("invalid ascii85", __func__, 1);
1226 pixGetDimensions(pixs, &w, &h, NULL);
1227 if (w == 0 || h == 0)
1228 return ERROR_INT("invalid w or h", __func__, 1);
1229 maxAsp = L_MAX(w / h, h / w);
1230 if (maxAsp > 10)
1231 return ERROR_INT("max asperity > 10", __func__, 1);
1232
1233 /* Conditionally modify the encoding type if libz is
1234 * available and the requested library is missing. */
1235#if defined(HAVE_LIBZ)
1236# if !defined(HAVE_LIBJPEG)
1237 if (type == L_JPEG_ENCODE) {
1238 L_WARNING("no libjpeg; using flate encoding\n", __func__);
1239 type = L_FLATE_ENCODE;
1240 }
1241# endif /* !defined(HAVE_LIBJPEG) */
1242# if !defined(HAVE_LIBJP2K)
1243 if (type == L_JP2K_ENCODE) {
1244 L_WARNING("no libjp2k; using flate encoding\n", __func__);
1245 type = L_FLATE_ENCODE;
1246 }
1247# endif /* !defined(HAVE_LIBJP2K) */
1248# if !defined(HAVE_LIBTIFF)
1249 if (type == L_G4_ENCODE) {
1250 L_WARNING("no libtiff; using flate encoding\n", __func__);
1251 type = L_FLATE_ENCODE;
1252 }
1253# endif /* !defined(HAVE_LIBTIFF) */
1254#endif /* defined(HAVE_LIBZ) */
1255
1256 /* Sanity check on requested encoding */
1257 d = pixGetDepth(pixs);
1258 cmap = pixGetColormap(pixs);
1259 if (cmap && type != L_FLATE_ENCODE) {
1260 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
1261 type = L_FLATE_ENCODE;
1262 } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) {
1263 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
1264 type = L_FLATE_ENCODE;
1265 } else if (d > 1 && type == L_G4_ENCODE) {
1266 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
1267 type = L_FLATE_ENCODE;
1268 }
1269
1270 if (type == L_JPEG_ENCODE) {
1271 if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)
1272 return ERROR_INT("jpeg data not made", __func__, 1);
1273 } else if (type == L_JP2K_ENCODE) {
1274 if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)
1275 return ERROR_INT("jp2k data not made", __func__, 1);
1276 } else if (type == L_G4_ENCODE) {
1277 if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)
1278 return ERROR_INT("g4 data not made", __func__, 1);
1279 } else { /* type == L_FLATE_ENCODE */
1280 if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)
1281 return ERROR_INT("flate data not made", __func__, 1);
1282 }
1283 return 0;
1284}
1285
1286
1308l_generateFlateData(const char *fname,
1309 l_int32 ascii85flag)
1310{
1311L_COMP_DATA *cid;
1312PIX *pixs;
1313
1314 if (!fname)
1315 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1316
1317 if ((pixs = pixRead(fname)) == NULL)
1318 return (L_COMP_DATA *)ERROR_PTR("pixs not made", __func__, NULL);
1319 cid = pixGenerateFlateData(pixs, ascii85flag);
1320 pixDestroy(&pixs);
1321 return cid;
1322}
1323
1324
1342static L_COMP_DATA *
1344 l_int32 ascii85flag)
1345{
1346l_uint8 *data = NULL; /* uncompressed raster data in required format */
1347l_uint8 *datacomp = NULL; /* gzipped raster data */
1348char *data85 = NULL; /* ascii85 encoded gzipped raster data */
1349l_uint8 *cmapdata = NULL; /* uncompressed colormap */
1350char *cmapdata85 = NULL; /* ascii85 encoded uncompressed colormap */
1351char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
1352l_int32 ncolors; /* in colormap; not used if cmapdata85 is null */
1353l_int32 bps; /* bits/sample: usually 8 */
1354l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb */
1355l_int32 w, h, d, cmapflag;
1356size_t ncmapbytes85 = 0;
1357size_t nbytes85 = 0;
1358size_t nbytes, nbytescomp;
1359L_COMP_DATA *cid;
1360PIX *pixt;
1361PIXCMAP *cmap;
1362
1363 if (!pixs)
1364 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1365
1366 /* Convert the image to one of these 4 types:
1367 * 1 bpp
1368 * 8 bpp, no colormap
1369 * 8 bpp, colormap
1370 * 32 bpp rgb */
1371 pixGetDimensions(pixs, &w, &h, &d);
1372 cmap = pixGetColormap(pixs);
1373 cmapflag = (cmap) ? 1 : 0;
1374 if (d == 2 || d == 4 || d == 16) {
1375 pixt = pixConvertTo8(pixs, cmapflag);
1376 cmap = pixGetColormap(pixt);
1377 d = pixGetDepth(pixt);
1378 } else if (d == 32 && pixGetSpp(pixs) == 4) { /* remove alpha */
1379 pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
1380 } else {
1381 pixt = pixClone(pixs);
1382 }
1383 if (!pixt)
1384 return (L_COMP_DATA *)ERROR_PTR("pixt not made", __func__, NULL);
1385 spp = (d == 32) ? 3 : 1;
1386 bps = (d == 32) ? 8 : d;
1387
1388 /* Extract and encode the colormap data as both ascii85 and hexascii */
1389 ncolors = 0;
1390 if (cmap) {
1391 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
1392 if (!cmapdata) {
1393 pixDestroy(&pixt);
1394 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
1395 __func__, NULL);
1396 }
1397
1398 cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
1399 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
1400 LEPT_FREE(cmapdata);
1401 }
1402
1403 /* Extract and compress the raster data */
1404 pixGetRasterData(pixt, &data, &nbytes);
1405 pixDestroy(&pixt);
1406 if (!data) {
1407 LEPT_FREE(cmapdata85);
1408 LEPT_FREE(cmapdatahex);
1409 return (L_COMP_DATA *)ERROR_PTR("data not returned", __func__, NULL);
1410 }
1411 datacomp = zlibCompress(data, nbytes, &nbytescomp);
1412 LEPT_FREE(data);
1413 if (!datacomp) {
1414 LEPT_FREE(cmapdata85);
1415 LEPT_FREE(cmapdatahex);
1416 return (L_COMP_DATA *)ERROR_PTR("datacomp not made", __func__, NULL);
1417 }
1418
1419 /* Optionally, encode the compressed data */
1420 if (ascii85flag == 1) {
1421 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1422 LEPT_FREE(datacomp);
1423 if (!data85) {
1424 LEPT_FREE(cmapdata85);
1425 LEPT_FREE(cmapdatahex);
1426 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1427 } else {
1428 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1429 }
1430 }
1431
1432 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1433 if (ascii85flag == 0) {
1434 cid->datacomp = datacomp;
1435 } else { /* ascii85 */
1436 cid->data85 = data85;
1437 cid->nbytes85 = nbytes85;
1438 }
1439 cid->type = L_FLATE_ENCODE;
1440 cid->cmapdatahex = cmapdatahex;
1441 cid->cmapdata85 = cmapdata85;
1442 cid->nbytescomp = nbytescomp;
1443 cid->ncolors = ncolors;
1444 cid->w = w;
1445 cid->h = h;
1446 cid->bps = bps;
1447 cid->spp = spp;
1448 cid->res = pixGetXRes(pixs);
1449 cid->nbytes = nbytes; /* only for debugging */
1450 return cid;
1451}
1452
1453
1470static L_COMP_DATA *
1472 l_int32 ascii85flag,
1473 l_int32 quality)
1474{
1475l_int32 d;
1476char *fname;
1477L_COMP_DATA *cid;
1478
1479 if (!pixs)
1480 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1481 if (pixGetColormap(pixs))
1482 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1483 d = pixGetDepth(pixs);
1484 if (d != 8 && d != 16 && d != 32)
1485 return (L_COMP_DATA *)ERROR_PTR("pixs not 8, 16 or 32 bpp",
1486 __func__, NULL);
1487
1488 /* Compress to a temp jpeg file */
1489 fname = l_makeTempFilename();
1490 if (pixWriteJpeg(fname, pixs, quality, 0)) {
1491 LEPT_FREE(fname);
1492 return NULL;
1493 }
1494
1495 /* Generate the data */
1496 cid = l_generateJpegData(fname, ascii85flag);
1497 if (lept_rmfile(fname) != 0)
1498 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1499 LEPT_FREE(fname);
1500 return cid;
1501}
1502
1503
1518static L_COMP_DATA *
1520 l_int32 quality)
1521{
1522l_int32 d;
1523char *fname;
1524L_COMP_DATA *cid;
1525
1526 if (!pixs)
1527 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1528 if (pixGetColormap(pixs))
1529 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1530 d = pixGetDepth(pixs);
1531 if (d != 8 && d != 32)
1532 return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
1533
1534 /* Compress to a temp jp2k file */
1535 fname = l_makeTempFilename();
1536 if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
1537 LEPT_FREE(fname);
1538 return NULL;
1539 }
1540
1541 /* Generate the data */
1542 cid = l_generateJp2kData(fname);
1543 if (lept_rmfile(fname) != 0)
1544 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1545 LEPT_FREE(fname);
1546 return cid;
1547}
1548
1549
1564static L_COMP_DATA *
1566 l_int32 ascii85flag)
1567{
1568char *fname;
1569L_COMP_DATA *cid;
1570
1571 if (!pixs)
1572 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1573 if (pixGetDepth(pixs) != 1)
1574 return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", __func__, NULL);
1575 if (pixGetColormap(pixs))
1576 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1577
1578 /* Compress to a temp tiff g4 file */
1579 fname = l_makeTempFilename();
1580 if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
1581 LEPT_FREE(fname);
1582 return NULL;
1583 }
1584
1585 cid = l_generateG4Data(fname, ascii85flag);
1586 if (lept_rmfile(fname) != 0)
1587 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1588 LEPT_FREE(fname);
1589 return cid;
1590}
1591
1592
1608l_ok
1610 const char *title,
1611 l_uint8 **pdata,
1612 size_t *pnbytes)
1613{
1614l_int32 res, ret;
1615l_float32 wpt, hpt;
1616L_PDF_DATA *lpd = NULL;
1617
1618 if (!pdata || !pnbytes)
1619 return ERROR_INT("&data and &nbytes not both defined", __func__, 1);
1620 *pdata = NULL;
1621 *pnbytes = 0;
1622 if (!cid)
1623 return ERROR_INT("cid not defined", __func__, 1);
1624
1625 /* Get media box parameters, in pts */
1626 res = cid->res;
1627 if (res <= 0)
1628 res = DefaultInputRes;
1629 wpt = cid->w * 72.f / res;
1630 hpt = cid->h * 72.f / res;
1631
1632 /* Set up the pdf data struct (lpd) */
1633 if ((lpd = pdfdataCreate(title)) == NULL)
1634 return ERROR_INT("lpd not made", __func__, 1);
1635 ptraAdd(lpd->cida, cid);
1636 lpd->n++;
1637 ptaAddPt(lpd->xy, 0, 0); /* xpt = ypt = 0 */
1638 ptaAddPt(lpd->wh, wpt, hpt);
1639
1640 /* Generate the pdf string and destroy the lpd */
1641 ret = l_generatePdf(pdata, pnbytes, lpd);
1642 pdfdataDestroy(&lpd);
1643 if (ret)
1644 return ERROR_INT("pdf output not made", __func__, 1);
1645 return 0;
1646}
1647
1648
1655void
1657{
1658L_COMP_DATA *cid;
1659
1660 if (pcid == NULL) {
1661 L_WARNING("ptr address is null!\n", __func__);
1662 return;
1663 }
1664 if ((cid = *pcid) == NULL)
1665 return;
1666
1667 if (cid->datacomp) LEPT_FREE(cid->datacomp);
1668 if (cid->data85) LEPT_FREE(cid->data85);
1669 if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);
1670 if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);
1671 LEPT_FREE(cid);
1672 *pcid = NULL;
1673}
1674
1675
1676/*---------------------------------------------------------------------*
1677 * Helper functions for generating the output pdf string *
1678 *---------------------------------------------------------------------*/
1700static l_int32
1701l_generatePdf(l_uint8 **pdata,
1702 size_t *pnbytes,
1703 L_PDF_DATA *lpd)
1704{
1705 if (!pdata)
1706 return ERROR_INT("&data not defined", __func__, 1);
1707 *pdata = NULL;
1708 if (!pnbytes)
1709 return ERROR_INT("&nbytes not defined", __func__, 1);
1710 *pnbytes = 0;
1711 if (!lpd)
1712 return ERROR_INT("lpd not defined", __func__, 1);
1713
1714 generateFixedStringsPdf(lpd);
1715 generateMediaboxPdf(lpd);
1716 generatePageStringPdf(lpd);
1717 generateContentStringPdf(lpd);
1718 generatePreXStringsPdf(lpd);
1719 generateColormapStringsPdf(lpd);
1720 generateTrailerPdf(lpd);
1721 return generateOutputDataPdf(pdata, pnbytes, lpd);
1722}
1723
1724
1725static void
1726generateFixedStringsPdf(L_PDF_DATA *lpd)
1727{
1728char buf[L_SMALLBUF];
1729char *version, *datestr;
1730SARRAY *sa;
1731
1732 /* Accumulate data for the header and objects 1-3 */
1733 lpd->id = stringNew("%PDF-1.5\n");
1734 l_dnaAddNumber(lpd->objsize, strlen(lpd->id));
1735
1736 lpd->obj1 = stringNew("1 0 obj\n"
1737 "<<\n"
1738 "/Type /Catalog\n"
1739 "/Pages 3 0 R\n"
1740 ">>\n"
1741 "endobj\n");
1742 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));
1743
1744 sa = sarrayCreate(0);
1745 sarrayAddString(sa, "2 0 obj\n"
1746 "<<\n", L_COPY);
1747 if (var_WRITE_DATE_AND_VERSION) {
1748 datestr = l_getFormattedDate();
1749 snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);
1750 sarrayAddString(sa, buf, L_COPY);
1751 LEPT_FREE(datestr);
1752 version = getLeptonicaVersion();
1753 snprintf(buf, sizeof(buf),
1754 "/Producer (leptonica: %s)\n", version);
1755 LEPT_FREE(version);
1756 } else {
1757 snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");
1758 }
1759 sarrayAddString(sa, buf, L_COPY);
1760 if (lpd->title) {
1761 char *hexstr;
1762 if ((hexstr = generateEscapeString(lpd->title)) != NULL) {
1763 snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);
1764 sarrayAddString(sa, buf, L_COPY);
1765 } else {
1766 L_ERROR("title string is not ascii\n", __func__);
1767 }
1768 LEPT_FREE(hexstr);
1769 }
1770 sarrayAddString(sa, ">>\n"
1771 "endobj\n", L_COPY);
1772 lpd->obj2 = sarrayToString(sa, 0);
1773 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));
1774 sarrayDestroy(&sa);
1775
1776 lpd->obj3 = stringNew("3 0 obj\n"
1777 "<<\n"
1778 "/Type /Pages\n"
1779 "/Kids [ 4 0 R ]\n"
1780 "/Count 1\n"
1781 ">>\n");
1782 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));
1783
1784 /* Do the post-datastream string */
1785 lpd->poststream = stringNew("\n"
1786 "endstream\n"
1787 "endobj\n");
1788}
1789
1790
1808static char *
1809generateEscapeString(const char *str)
1810{
1811char smallbuf[8];
1812char *buffer;
1813l_int32 i, nchar, buflen;
1814
1815 if (!str)
1816 return (char *)ERROR_PTR("str not defined", __func__, NULL);
1817 nchar = strlen(str);
1818 for (i = 0; i < nchar; i++) {
1819 if (str[i] < 0)
1820 return (char *)ERROR_PTR("str not all ascii", __func__, NULL);
1821 }
1822
1823 buflen = 4 * nchar + 10;
1824 buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));
1825 stringCat(buffer, buflen, "<feff");
1826 for (i = 0; i < nchar; i++) {
1827 snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);
1828 stringCat(buffer, buflen, smallbuf);
1829 }
1830 stringCat(buffer, buflen, ">");
1831 return buffer;
1832}
1833
1834
1835static void
1836generateMediaboxPdf(L_PDF_DATA *lpd)
1837{
1838l_int32 i;
1839l_float32 xpt, ypt, wpt, hpt, maxx, maxy;
1840
1841 /* First get the full extent of all the images.
1842 * This is the mediabox, in pts. */
1843 maxx = maxy = 0;
1844 for (i = 0; i < lpd->n; i++) {
1845 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1846 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1847 maxx = L_MAX(maxx, xpt + wpt);
1848 maxy = L_MAX(maxy, ypt + hpt);
1849 }
1850
1851 lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
1852 (l_int32)(maxy + 0.5));
1853
1854 /* ypt is in standard image coordinates: the location of
1855 * the UL image corner with respect to the UL media box corner.
1856 * Rewrite each ypt for PostScript coordinates: the location of
1857 * the LL image corner with respect to the LL media box corner. */
1858 for (i = 0; i < lpd->n; i++) {
1859 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1860 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1861 ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);
1862 }
1863}
1864
1865
1866static l_int32
1867generatePageStringPdf(L_PDF_DATA *lpd)
1868{
1869char *buf;
1870char *xstr;
1871l_int32 bufsize, i, wpt, hpt;
1872SARRAY *sa;
1873
1874 /* Allocate 1000 bytes for the boilerplate text, and
1875 * 50 bytes for each reference to an image in the
1876 * ProcSet array. */
1877 bufsize = 1000 + 50 * lpd->n;
1878 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1879 return ERROR_INT("calloc fail for buf", __func__, 1);
1880
1881 boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);
1882 sa = sarrayCreate(lpd->n);
1883 for (i = 0; i < lpd->n; i++) {
1884 snprintf(buf, bufsize, "/Im%d %d 0 R ", i + 1, 6 + i);
1885 sarrayAddString(sa, buf, L_COPY);
1886 }
1887 xstr = sarrayToString(sa, 0);
1888 sarrayDestroy(&sa);
1889 if (!xstr) {
1890 LEPT_FREE(buf);
1891 return ERROR_INT("xstr not made", __func__, 1);
1892 }
1893
1894 snprintf(buf, bufsize, "4 0 obj\n"
1895 "<<\n"
1896 "/Type /Page\n"
1897 "/Parent 3 0 R\n"
1898 "/MediaBox [%d %d %d %d]\n"
1899 "/Contents 5 0 R\n"
1900 "/Resources\n"
1901 "<<\n"
1902 "/XObject << %s >>\n"
1903 "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
1904 ">>\n"
1905 ">>\n"
1906 "endobj\n",
1907 0, 0, wpt, hpt, xstr);
1908
1909 lpd->obj4 = stringNew(buf);
1910 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));
1911 sarrayDestroy(&sa);
1912 LEPT_FREE(buf);
1913 LEPT_FREE(xstr);
1914 return 0;
1915}
1916
1917
1918static l_int32
1919generateContentStringPdf(L_PDF_DATA *lpd)
1920{
1921char *buf;
1922char *cstr;
1923l_int32 i, bufsize;
1924l_float32 xpt, ypt, wpt, hpt;
1925SARRAY *sa;
1926
1927 bufsize = 1000 + 200 * lpd->n;
1928 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1929 return ERROR_INT("calloc fail for buf", __func__, 1);
1930
1931 sa = sarrayCreate(lpd->n);
1932 for (i = 0; i < lpd->n; i++) {
1933 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1934 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1935 snprintf(buf, bufsize,
1936 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
1937 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
1938 sarrayAddString(sa, buf, L_COPY);
1939 }
1940 cstr = sarrayToString(sa, 0);
1941 sarrayDestroy(&sa);
1942 if (!cstr) {
1943 LEPT_FREE(buf);
1944 return ERROR_INT("cstr not made", __func__, 1);
1945 }
1946
1947 snprintf(buf, bufsize, "5 0 obj\n"
1948 "<< /Length %d >>\n"
1949 "stream\n"
1950 "%s"
1951 "endstream\n"
1952 "endobj\n",
1953 (l_int32)strlen(cstr), cstr);
1954
1955 lpd->obj5 = stringNew(buf);
1956 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));
1957 sarrayDestroy(&sa);
1958 LEPT_FREE(buf);
1959 LEPT_FREE(cstr);
1960 return 0;
1961}
1962
1963
1964static l_int32
1965generatePreXStringsPdf(L_PDF_DATA *lpd)
1966{
1967char buff[256];
1968char buf[L_BIGBUF];
1969char *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
1970l_int32 i, cmindex;
1971L_COMP_DATA *cid;
1972SARRAY *sa;
1973
1974 sa = lpd->saprex;
1975 cmindex = 6 + lpd->n; /* starting value */
1976 for (i = 0; i < lpd->n; i++) {
1977 pstr = cstr = NULL;
1978 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
1979 return ERROR_INT("cid not found", __func__, 1);
1980
1981 if (cid->type == L_G4_ENCODE) {
1982 if (var_WRITE_G4_IMAGE_MASK) {
1983 cstr = stringNew("/ImageMask true\n"
1984 "/ColorSpace /DeviceGray");
1985 } else {
1986 cstr = stringNew("/ColorSpace /DeviceGray");
1987 }
1988 bstr = stringNew("/BitsPerComponent 1\n"
1989 "/Interpolate true");
1990 /* Note: the reversal is deliberate. The BlackIs1 flag
1991 * is misleadingly named: it says whether to invert the
1992 * image on decoding because the black pixels are 0,
1993 * not whether the black pixels are 1! The default for
1994 * BlackIs1 is "false", which means "don't invert because
1995 * black is 1." Yikes. */
1996 photometry = (cid->minisblack) ? stringNew("true")
1997 : stringNew("false");
1998 snprintf(buff, sizeof(buff),
1999 "/Filter /CCITTFaxDecode\n"
2000 "/DecodeParms\n"
2001 "<<\n"
2002 "/BlackIs1 %s\n"
2003 "/K -1\n"
2004 "/Columns %d\n"
2005 ">>", photometry, cid->w);
2006 fstr = stringNew(buff);
2007 LEPT_FREE(photometry);
2008 } else if (cid->type == L_JPEG_ENCODE) {
2009 if (cid->spp == 1)
2010 cstr = stringNew("/ColorSpace /DeviceGray");
2011 else if (cid->spp == 3)
2012 cstr = stringNew("/ColorSpace /DeviceRGB");
2013 else if (cid->spp == 4) /* pdf supports cmyk */
2014 cstr = stringNew("/ColorSpace /DeviceCMYK");
2015 else
2016 L_ERROR("in jpeg: spp != 1, 3 or 4\n", __func__);
2017 bstr = stringNew("/BitsPerComponent 8");
2018 fstr = stringNew("/Filter /DCTDecode");
2019 } else if (cid->type == L_JP2K_ENCODE) {
2020 if (cid->spp == 1)
2021 cstr = stringNew("/ColorSpace /DeviceGray");
2022 else if (cid->spp == 3)
2023 cstr = stringNew("/ColorSpace /DeviceRGB");
2024 else
2025 L_ERROR("in jp2k: spp != 1 && spp != 3\n", __func__);
2026 bstr = stringNew("/BitsPerComponent 8");
2027 fstr = stringNew("/Filter /JPXDecode");
2028 } else { /* type == L_FLATE_ENCODE */
2029 if (cid->ncolors > 0) { /* cmapped */
2030 snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);
2031 cstr = stringNew(buff);
2032 } else {
2033 if (cid->spp == 1 && cid->bps == 1)
2034 cstr = stringNew("/ColorSpace /DeviceGray\n"
2035 "/Decode [1 0]");
2036 else if (cid->spp == 1) /* 8 bpp */
2037 cstr = stringNew("/ColorSpace /DeviceGray");
2038 else if (cid->spp == 3)
2039 cstr = stringNew("/ColorSpace /DeviceRGB");
2040 else
2041 L_ERROR("unknown colorspace: spp = %d\n",
2042 __func__, cid->spp);
2043 }
2044 snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);
2045 bstr = stringNew(buff);
2046 fstr = stringNew("/Filter /FlateDecode");
2047 if (cid->predictor == TRUE) {
2048 snprintf(buff, sizeof(buff),
2049 "/DecodeParms\n"
2050 "<<\n"
2051 " /Columns %d\n"
2052 " /Predictor 14\n"
2053 " /Colors %d\n"
2054 " /BitsPerComponent %d\n"
2055 ">>\n", cid->w, cid->spp, cid->bps);
2056 pstr = stringNew(buff);
2057 }
2058 }
2059 if (!pstr) /* no decode parameters */
2060 pstr = stringNew("");
2061
2062 snprintf(buf, sizeof(buf),
2063 "%d 0 obj\n"
2064 "<<\n"
2065 "/Length %zu\n"
2066 "/Subtype /Image\n"
2067 "%s\n" /* colorspace */
2068 "/Width %d\n"
2069 "/Height %d\n"
2070 "%s\n" /* bits/component */
2071 "%s\n" /* filter */
2072 "%s" /* decode parms; can be empty */
2073 ">>\n"
2074 "stream\n",
2075 6 + i, cid->nbytescomp, cstr,
2076 cid->w, cid->h, bstr, fstr, pstr);
2077 xstr = stringNew(buf);
2078 sarrayAddString(sa, xstr, L_INSERT);
2079 l_dnaAddNumber(lpd->objsize,
2080 strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));
2081 LEPT_FREE(cstr);
2082 LEPT_FREE(bstr);
2083 LEPT_FREE(fstr);
2084 LEPT_FREE(pstr);
2085 }
2086
2087 return 0;
2088}
2089
2090
2091static l_int32
2092generateColormapStringsPdf(L_PDF_DATA *lpd)
2093{
2094char buf[L_BIGBUF];
2095char *cmstr;
2096l_int32 i, cmindex, ncmap;
2097L_COMP_DATA *cid;
2098SARRAY *sa;
2099
2100 /* In our canonical format, we have 5 objects, followed
2101 * by n XObjects, followed by m colormaps, so the index of
2102 * the first colormap object is 6 + n. */
2103 sa = lpd->sacmap;
2104 cmindex = 6 + lpd->n; /* starting value */
2105 ncmap = 0;
2106 for (i = 0; i < lpd->n; i++) {
2107 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
2108 return ERROR_INT("cid not found", __func__, 1);
2109 if (cid->ncolors == 0) continue;
2110
2111 ncmap++;
2112 snprintf(buf, sizeof(buf), "%d 0 obj\n"
2113 "[ /Indexed /DeviceRGB\n"
2114 "%d\n"
2115 "%s\n"
2116 "]\n"
2117 "endobj\n",
2118 cmindex, cid->ncolors - 1, cid->cmapdatahex);
2119 cmindex++;
2120 cmstr = stringNew(buf);
2121 l_dnaAddNumber(lpd->objsize, strlen(cmstr));
2122 sarrayAddString(sa, cmstr, L_INSERT);
2123 }
2124
2125 lpd->ncmap = ncmap;
2126 return 0;
2127}
2128
2129
2130static void
2131generateTrailerPdf(L_PDF_DATA *lpd)
2132{
2133l_int32 i, n, size, linestart;
2134L_DNA *daloc, *dasize;
2135
2136 /* Let nobj be the number of numbered objects. These numbered
2137 * objects are indexed by their pdf number in arrays naloc[]
2138 * and nasize[]. The 0th object is the 9 byte header. Then
2139 * the number of objects in nasize, which includes the header,
2140 * is n = nobj + 1. The array naloc[] has n + 1 elements,
2141 * because it includes as the last element the starting
2142 * location of xref. The indexing of these objects, their
2143 * starting locations and sizes are:
2144 *
2145 * Object number Starting location Size
2146 * ------------- ----------------- --------------
2147 * 0 daloc[0] = 0 dasize[0] = 9
2148 * 1 daloc[1] = 9 dasize[1] = 49
2149 * n daloc[n] dasize[n]
2150 * xref daloc[n+1]
2151 *
2152 * We first generate daloc.
2153 */
2154 dasize = lpd->objsize;
2155 daloc = lpd->objloc;
2156 linestart = 0;
2157 l_dnaAddNumber(daloc, linestart); /* header */
2158 n = l_dnaGetCount(dasize);
2159 for (i = 0; i < n; i++) {
2160 l_dnaGetIValue(dasize, i, &size);
2161 linestart += size;
2162 l_dnaAddNumber(daloc, linestart);
2163 }
2164 l_dnaGetIValue(daloc, n, &lpd->xrefloc); /* save it */
2165
2166 /* Now make the actual trailer string */
2167 lpd->trailer = makeTrailerStringPdf(daloc);
2168}
2169
2170
2171static char *
2172makeTrailerStringPdf(L_DNA *daloc)
2173{
2174char *outstr;
2175char buf[L_BIGBUF];
2176l_int32 i, n, linestart, xrefloc;
2177SARRAY *sa;
2178
2179 if (!daloc)
2180 return (char *)ERROR_PTR("daloc not defined", __func__, NULL);
2181 n = l_dnaGetCount(daloc) - 1; /* numbered objects + 1 (yes, +1) */
2182
2183 sa = sarrayCreate(0);
2184 snprintf(buf, sizeof(buf), "xref\n"
2185 "0 %d\n"
2186 "0000000000 65535 f \n", n);
2187 sarrayAddString(sa, buf, L_COPY);
2188 for (i = 1; i < n; i++) {
2189 l_dnaGetIValue(daloc, i, &linestart);
2190 snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);
2191 sarrayAddString(sa, buf, L_COPY);
2192 }
2193
2194 l_dnaGetIValue(daloc, n, &xrefloc);
2195 snprintf(buf, sizeof(buf), "trailer\n"
2196 "<<\n"
2197 "/Size %d\n"
2198 "/Root 1 0 R\n"
2199 "/Info 2 0 R\n"
2200 ">>\n"
2201 "startxref\n"
2202 "%d\n"
2203 "%%%%EOF\n", n, xrefloc);
2204 sarrayAddString(sa, buf, L_COPY);
2205 outstr = sarrayToString(sa, 0);
2206 sarrayDestroy(&sa);
2207 return outstr;
2208}
2209
2210
2224static l_int32
2226 size_t *pnbytes,
2227 L_PDF_DATA *lpd)
2228{
2229char *str;
2230l_uint8 *data;
2231l_int32 nimages, i, len;
2232l_int32 *sizes, *locs;
2233size_t nbytes;
2234L_COMP_DATA *cid;
2235
2236 if (!pdata)
2237 return ERROR_INT("&data not defined", __func__, 1);
2238 *pdata = NULL;
2239 if (!pnbytes)
2240 return ERROR_INT("&nbytes not defined", __func__, 1);
2241 nbytes = lpd->xrefloc + strlen(lpd->trailer);
2242 *pnbytes = nbytes;
2243 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)
2244 return ERROR_INT("calloc fail for data", __func__, 1);
2245 *pdata = data;
2246
2247 sizes = l_dnaGetIArray(lpd->objsize);
2248 locs = l_dnaGetIArray(lpd->objloc);
2249 memcpy(data, lpd->id, sizes[0]);
2250 memcpy(data + locs[1], lpd->obj1, sizes[1]);
2251 memcpy(data + locs[2], lpd->obj2, sizes[2]);
2252 memcpy(data + locs[3], lpd->obj3, sizes[3]);
2253 memcpy(data + locs[4], lpd->obj4, sizes[4]);
2254 memcpy(data + locs[5], lpd->obj5, sizes[5]);
2255
2256 /* Each image has 3 parts: variable preamble, the compressed
2257 * data stream, and the fixed poststream. */
2258 nimages = lpd->n;
2259 for (i = 0; i < nimages; i++) {
2260 if ((cid = pdfdataGetCid(lpd, i)) == NULL) { /* should not happen */
2261 LEPT_FREE(sizes);
2262 LEPT_FREE(locs);
2263 return ERROR_INT("cid not found", __func__, 1);
2264 }
2265 str = sarrayGetString(lpd->saprex, i, L_NOCOPY);
2266 len = strlen(str);
2267 memcpy(data + locs[6 + i], str, len);
2268 memcpy(data + locs[6 + i] + len,
2269 cid->datacomp, cid->nbytescomp);
2270 memcpy(data + locs[6 + i] + len + cid->nbytescomp,
2271 lpd->poststream, strlen(lpd->poststream));
2272 }
2273
2274 /* Each colormap is simply a stored string */
2275 for (i = 0; i < lpd->ncmap; i++) {
2276 str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);
2277 memcpy(data + locs[6 + nimages + i], str, strlen(str));
2278 }
2279
2280 /* And finally the trailer */
2281 memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));
2282 LEPT_FREE(sizes);
2283 LEPT_FREE(locs);
2284 return 0;
2285}
2286
2287
2288/*---------------------------------------------------------------------*
2289 * Helper functions for generating multipage pdf output *
2290 *---------------------------------------------------------------------*/
2298static l_int32
2300 L_DNA **pda)
2301{
2302char *str;
2303l_uint8 nl = '\n';
2304l_uint8 *data;
2305l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
2306size_t size;
2307L_DNA *da, *daobj, *daxref;
2308SARRAY *sa;
2309
2310 if (!pda)
2311 return ERROR_INT("&da not defined", __func__, 1);
2312 *pda = NULL;
2313 if (!bas)
2314 return ERROR_INT("bas not defined", __func__, 1);
2315 data = l_byteaGetData(bas, &size);
2316 if (memcmp(data, "%PDF-1.", 7) != 0)
2317 return ERROR_INT("PDF header signature not found", __func__, 1);
2318
2319 /* Search for "startxref" starting 50 bytes from the EOF */
2320 start = 0;
2321 if (size > 50)
2322 start = size - 50;
2323 arrayFindSequence(data + start, size - start,
2324 (l_uint8 *)"startxref\n", 10, &loc, &found);
2325 if (!found)
2326 return ERROR_INT("startxref not found!", __func__, 1);
2327 if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)
2328 return ERROR_INT("xrefloc not found!", __func__, 1);
2329 if (xrefloc < 0 || xrefloc >= size)
2330 return ERROR_INT("invalid xrefloc!", __func__, 1);
2331 sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);
2332 str = sarrayGetString(sa, 1, L_NOCOPY);
2333 if ((sscanf(str, "0 %d", &nobj)) != 1) {
2334 sarrayDestroy(&sa);
2335 return ERROR_INT("nobj not found", __func__, 1);
2336 }
2337
2338 /* Get starting locations. The numa index is the
2339 * object number. loc[0] is the ID; loc[nobj + 1] is xrefloc. */
2340 da = l_dnaCreate(nobj + 1);
2341 *pda = da;
2342 for (i = 0; i < nobj; i++) {
2343 str = sarrayGetString(sa, i + 2, L_NOCOPY);
2344 sscanf(str, "%d", &startloc);
2345 l_dnaAddNumber(da, startloc);
2346 }
2347 l_dnaAddNumber(da, xrefloc);
2348
2349#if DEBUG_MULTIPAGE
2350 lept_stderr("************** Trailer string ************\n");
2351 lept_stderr("xrefloc = %d", xrefloc);
2352 sarrayWriteStderr(sa);
2353
2354 lept_stderr("************** Object locations ************");
2355 l_dnaWriteStderr(da);
2356#endif /* DEBUG_MULTIPAGE */
2357 sarrayDestroy(&sa);
2358
2359 /* Verify correct parsing */
2360 trailer_ok = TRUE;
2361 for (i = 1; i < nobj; i++) {
2362 l_dnaGetIValue(da, i, &startloc);
2363 if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) {
2364 L_ERROR("bad trailer for object %d\n", __func__, i);
2365 trailer_ok = FALSE;
2366 break;
2367 }
2368 }
2369
2370 /* If the trailer is broken, reconstruct the correct obj locations */
2371 if (!trailer_ok) {
2372 L_INFO("rebuilding pdf trailer\n", __func__);
2373 l_dnaEmpty(da);
2374 l_dnaAddNumber(da, 0);
2375 l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);
2376 nobj = l_dnaGetCount(daobj);
2377 for (i = 0; i < nobj; i++) {
2378 l_dnaGetIValue(daobj, i, &loc);
2379 for (j = loc - 1; j > 0; j--) {
2380 if (data[j] == nl)
2381 break;
2382 }
2383 l_dnaAddNumber(da, j + 1);
2384 }
2385 l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);
2386 l_dnaGetIValue(daxref, 0, &loc);
2387 l_dnaAddNumber(da, loc);
2388 l_dnaDestroy(&daobj);
2389 l_dnaDestroy(&daxref);
2390 }
2391
2392 return 0;
2393}
2394
2395
2396static char *
2397generatePagesObjStringPdf(NUMA *napage)
2398{
2399char *str;
2400char *buf;
2401l_int32 i, n, index, bufsize;
2402SARRAY *sa;
2403
2404 if (!napage)
2405 return (char *)ERROR_PTR("napage not defined", __func__, NULL);
2406
2407 n = numaGetCount(napage);
2408 bufsize = 100 + 16 * n; /* large enough to hold the output string */
2409 buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
2410 sa = sarrayCreate(n);
2411 for (i = 0; i < n; i++) {
2412 numaGetIValue(napage, i, &index);
2413 snprintf(buf, bufsize, " %d 0 R ", index);
2414 sarrayAddString(sa, buf, L_COPY);
2415 }
2416
2417 str = sarrayToString(sa, 0);
2418 snprintf(buf, bufsize - 1, "3 0 obj\n"
2419 "<<\n"
2420 "/Type /Pages\n"
2421 "/Kids [%s]\n"
2422 "/Count %d\n"
2423 ">>\n"
2424 "endobj\n",
2425 str, n);
2426 sarrayDestroy(&sa);
2427 LEPT_FREE(str);
2428 return buf;
2429}
2430
2431
2449static L_BYTEA *
2451 NUMA *na_objs)
2452{
2453l_uint8 space = ' ';
2454l_uint8 *datas;
2455l_uint8 buf[32]; /* only needs to hold one integer in ascii format */
2456l_int32 start, nrepl, i, j, nobjs, objin, objout, found;
2457l_int32 *objs, *matches;
2458size_t size;
2459L_BYTEA *bad;
2460L_DNA *da_match;
2461
2462 if (!bas)
2463 return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
2464 if (!na_objs)
2465 return (L_BYTEA *)ERROR_PTR("na_objs not defined", __func__, NULL);
2466
2467 datas = l_byteaGetData(bas, &size);
2468 bad = l_byteaCreate(100);
2469 objs = numaGetIArray(na_objs); /* object number mapper */
2470 nobjs = numaGetCount(na_objs); /* use for sanity checking */
2471
2472 /* Substitute the object number on the first line */
2473 sscanf((char *)datas, "%d", &objin);
2474 if (objin < 0 || objin >= nobjs) {
2475 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2476 LEPT_FREE(objs);
2477 return bad;
2478 }
2479 objout = objs[objin];
2480 snprintf((char *)buf, 32, "%d", objout);
2481 l_byteaAppendString(bad, (char *)buf);
2482
2483 /* Find the set of matching locations for object references */
2484 arrayFindSequence(datas, size, &space, 1, &start, &found);
2485 da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);
2486 if (!da_match) {
2487 l_byteaAppendData(bad, datas + start, size - start);
2488 LEPT_FREE(objs);
2489 return bad;
2490 }
2491
2492 /* Substitute all the object reference numbers */
2493 nrepl = l_dnaGetCount(da_match);
2494 matches = l_dnaGetIArray(da_match);
2495 for (i = 0; i < nrepl; i++) {
2496 /* Find the first space before the object number */
2497 for (j = matches[i] - 1; j > 0; j--) {
2498 if (datas[j] == space)
2499 break;
2500 }
2501 /* Copy bytes from 'start' up to the object number */
2502 l_byteaAppendData(bad, datas + start, j - start + 1);
2503 sscanf((char *)(datas + j + 1), "%d", &objin);
2504 if (objin < 0 || objin >= nobjs) {
2505 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2506 LEPT_FREE(objs);
2507 LEPT_FREE(matches);
2508 l_dnaDestroy(&da_match);
2509 return bad;
2510 }
2511 objout = objs[objin];
2512 snprintf((char *)buf, 32, "%d", objout);
2513 l_byteaAppendString(bad, (char *)buf);
2514 start = matches[i];
2515 }
2516 l_byteaAppendData(bad, datas + start, size - start);
2517
2518 LEPT_FREE(objs);
2519 LEPT_FREE(matches);
2520 l_dnaDestroy(&da_match);
2521 return bad;
2522}
2523
2524
2525/*---------------------------------------------------------------------*
2526 * Create/destroy/access pdf data *
2527 *---------------------------------------------------------------------*/
2528static L_PDF_DATA *
2529pdfdataCreate(const char *title)
2530{
2531L_PDF_DATA *lpd;
2532
2533 lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));
2534 if (title) lpd->title = stringNew(title);
2535 lpd->cida = ptraCreate(10);
2536 lpd->xy = ptaCreate(10);
2537 lpd->wh = ptaCreate(10);
2538 lpd->saprex = sarrayCreate(10);
2539 lpd->sacmap = sarrayCreate(10);
2540 lpd->objsize = l_dnaCreate(20);
2541 lpd->objloc = l_dnaCreate(20);
2542 return lpd;
2543}
2544
2545static void
2546pdfdataDestroy(L_PDF_DATA **plpd)
2547{
2548l_int32 i;
2549L_COMP_DATA *cid;
2550L_PDF_DATA *lpd;
2551
2552 if (plpd== NULL) {
2553 L_WARNING("ptr address is null!\n", __func__);
2554 return;
2555 }
2556 if ((lpd = *plpd) == NULL)
2557 return;
2558
2559 if (lpd->title) LEPT_FREE(lpd->title);
2560 for (i = 0; i < lpd->n; i++) {
2561 cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);
2562 l_CIDataDestroy(&cid);
2563 }
2564
2565 ptraDestroy(&lpd->cida, 0, 0);
2566 if (lpd->id) LEPT_FREE(lpd->id);
2567 if (lpd->obj1) LEPT_FREE(lpd->obj1);
2568 if (lpd->obj2) LEPT_FREE(lpd->obj2);
2569 if (lpd->obj3) LEPT_FREE(lpd->obj3);
2570 if (lpd->obj4) LEPT_FREE(lpd->obj4);
2571 if (lpd->obj5) LEPT_FREE(lpd->obj5);
2572 if (lpd->poststream) LEPT_FREE(lpd->poststream);
2573 if (lpd->trailer) LEPT_FREE(lpd->trailer);
2574 if (lpd->xy) ptaDestroy(&lpd->xy);
2575 if (lpd->wh) ptaDestroy(&lpd->wh);
2576 if (lpd->mediabox) boxDestroy(&lpd->mediabox);
2577 if (lpd->saprex) sarrayDestroy(&lpd->saprex);
2578 if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);
2579 if (lpd->objsize) l_dnaDestroy(&lpd->objsize);
2580 if (lpd->objloc) l_dnaDestroy(&lpd->objloc);
2581 LEPT_FREE(lpd);
2582 *plpd = NULL;
2583}
2584
2585
2586static L_COMP_DATA *
2587pdfdataGetCid(L_PDF_DATA *lpd,
2588 l_int32 index)
2589{
2590 if (!lpd)
2591 return (L_COMP_DATA *)ERROR_PTR("lpd not defined", __func__, NULL);
2592 if (index < 0 || index >= lpd->n)
2593 return (L_COMP_DATA *)ERROR_PTR("invalid image index", __func__, NULL);
2594
2595 return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);
2596}
2597
2598
2599/*---------------------------------------------------------------------*
2600 * Find number of pages in a pdf *
2601 *---------------------------------------------------------------------*/
2620l_ok
2621getPdfPageCount(const char *fname,
2622 l_int32 *pnpages)
2623{
2624l_uint8 *data;
2625l_int32 format, loc, ret, npages, found;
2626size_t nread;
2627
2628 if (!pnpages)
2629 return ERROR_INT("&npages not defined", __func__, 1);
2630 *pnpages = 0;
2631 if (!fname)
2632 return ERROR_INT("fname not defined", __func__, 1);
2633
2634 /* Make sure this a pdf file */
2635 findFileFormat(fname, &format);
2636 if (format != IFF_LPDF)
2637 return ERROR_INT("file is not pdf", __func__, 1);
2638
2639 /* Read 10000 bytes from the beginning of the file */
2640 if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))
2641 == NULL)
2642 return ERROR_INT("partial data not read", __func__, 1);
2643
2644 /* Find the location of the first instance of "/Count".
2645 * If it is not found, try reading the entire file and
2646 * looking again. */
2647 arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
2648 strlen("/Count"), &loc, &found);
2649 if (!found) {
2650 lept_stderr("Reading entire file looking for '/Count'\n");
2651 LEPT_FREE(data);
2652 if ((data = l_binaryRead(fname, &nread)) == NULL)
2653 return ERROR_INT("full data not read", __func__, 1);
2654 arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
2655 strlen("/Count"), &loc, &found);
2656 if (!found) {
2657 LEPT_FREE(data);
2658 L_WARNING("/Count not found\n", __func__);
2659 return 0;
2660 }
2661 }
2662
2663 /* Unlikely: make sure we can read the count field */
2664 if (nread - loc < 12) { /* haven't read enough to capture page count */
2665 LEPT_FREE(data);
2666 return ERROR_INT("data may not include page count field", __func__, 1);
2667 }
2668
2669 /* Read the page count; if not found, puts garbage in npages */
2670 ret = sscanf((char *)&data[loc], "/Count %d", &npages);
2671 LEPT_FREE(data);
2672 if (ret != 1)
2673 return ERROR_INT("npages not found", __func__, 1);
2674 *pnpages = npages;
2675/* lept_stderr("bytes read = %d, loc = %d, npages = %d\n",
2676 nread, loc, *pnpages); */
2677 return 0;
2678}
2679
2680
2681/*---------------------------------------------------------------------*
2682 * Find widths and heights of pages and media boxes in a pdf *
2683 *---------------------------------------------------------------------*/
2704l_ok
2705getPdfPageSizes(const char *fname,
2706 NUMA **pnaw,
2707 NUMA **pnah,
2708 l_int32 *pmedw,
2709 l_int32 *pmedh)
2710{
2711l_uint8 *data;
2712l_int32 i, nw, nh, format, ret, loc, width, height;
2713l_float32 fval;
2714size_t nread;
2715L_DNA *dnaw; /* width locations */
2716L_DNA *dnah; /* height locations */
2717NUMA *naw; /* widths */
2718NUMA *nah; /* heights */
2719
2720 if (pnaw) *pnaw = NULL;
2721 if (pnah) *pnah = NULL;
2722 if (pmedw) *pmedw = 0;
2723 if (pmedh) *pmedh = 0;
2724 if (!pnaw && !pnah && !pmedw && !pmedh)
2725 return ERROR_INT("no output requested", __func__, 1);
2726 if (!fname)
2727 return ERROR_INT("fname not defined", __func__, 1);
2728
2729 /* Make sure this a pdf file */
2730 findFileFormat(fname, &format);
2731 if (format != IFF_LPDF)
2732 return ERROR_INT("file is not pdf", __func__, 1);
2733
2734 /* Read the file into memory and find all locations of
2735 * '/Width' and '/Height' */
2736 if ((data = l_binaryRead(fname, &nread)) == NULL)
2737 return ERROR_INT("full data not read", __func__, 1);
2738 dnaw = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Width",
2739 strlen("/Width"));
2740 dnah = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Height",
2741 strlen("/Height"));
2742 if (!dnaw)
2743 L_WARNING("unable to find widths\n", __func__);
2744 if (!dnah)
2745 L_WARNING("unable to find heights\n", __func__);
2746 if (!dnaw && !dnah) {
2747 LEPT_FREE(data);
2748 L_WARNING("no fields found\n", __func__);
2749 return 0;
2750 }
2751
2752 /* Find the page widths and heights */
2753 nw = l_dnaGetCount(dnaw);
2754 naw = numaCreate(nw);
2755 for (i = 0; i < nw; i++) {
2756 l_dnaGetIValue(dnaw, i, &loc);
2757 ret = sscanf((char *)&data[loc], "/Width %d", &width);
2758 if (ret != 1) {
2759 L_ERROR("width not found for item %d at loc %d\n",
2760 __func__, i, loc);
2761 continue;
2762 }
2763 numaAddNumber(naw, width);
2764 }
2765 nh = l_dnaGetCount(dnah);
2766 nah = numaCreate(nh);
2767 for (i = 0; i < nh; i++) {
2768 l_dnaGetIValue(dnah, i, &loc);
2769 ret = sscanf((char *)&data[loc], "/Height %d", &height);
2770 if (ret != 1) {
2771 L_ERROR("height not found for item %d at loc %d\n",
2772 __func__, i, loc);
2773 continue;
2774 }
2775 numaAddNumber(nah, height);
2776 }
2777
2778 LEPT_FREE(data);
2779 l_dnaDestroy(&dnaw);
2780 l_dnaDestroy(&dnah);
2781 if (pmedw) {
2782 numaGetMedian(naw, &fval);
2783 *pmedw = lept_roundftoi(fval);
2784 }
2785 if (pnaw)
2786 *pnaw = naw;
2787 else
2788 numaDestroy(&naw);
2789 if (pmedh) {
2790 numaGetMedian(nah, &fval);
2791 *pmedh = lept_roundftoi(fval);
2792 }
2793 if (pnah)
2794 *pnah = nah;
2795 else
2796 numaDestroy(&nah);
2797 return 0;
2798}
2799
2800
2827l_ok
2828getPdfMediaBoxSizes(const char *fname,
2829 NUMA **pnaw,
2830 NUMA **pnah,
2831 l_int32 *pmedw,
2832 l_int32 *pmedh)
2833{
2834l_uint8 *data;
2835l_int32 i, n, format, ret, loc;
2836l_float32 fval, ignore1, ignore2, w, h;
2837size_t nread;
2838L_DNA *dna; /* mediabox locations */
2839NUMA *naw; /* mediabox widths */
2840NUMA *nah; /* mediabox heights */
2841
2842 if (pnaw) *pnaw = NULL;
2843 if (pnah) *pnah = NULL;
2844 if (pmedw) *pmedw = 0;
2845 if (pmedh) *pmedh = 0;
2846 if (!pnaw && !pnah && !pmedw && !pmedh)
2847 return ERROR_INT("no output requested", __func__, 1);
2848 if (!fname)
2849 return ERROR_INT("fname not defined", __func__, 1);
2850
2851 /* Make sure this a pdf file */
2852 findFileFormat(fname, &format);
2853 if (format != IFF_LPDF)
2854 return ERROR_INT("file is not pdf", __func__, 1);
2855
2856 /* Read the file into memory and find all locations of '/MediaBox' */
2857 if ((data = l_binaryRead(fname, &nread)) == NULL)
2858 return ERROR_INT("full data not read", __func__, 1);
2859 dna = arrayFindEachSequence(data, nread, (const l_uint8 *)"/MediaBox",
2860 strlen("/MediaBox"));
2861 if (!dna) {
2862 LEPT_FREE(data);
2863 L_WARNING("no mediaboxes found\n", __func__);
2864 return 1;
2865 }
2866
2867 /* Find the mediabox widths and heights */
2868 n = l_dnaGetCount(dna);
2869 naw = numaCreate(n);
2870 nah = numaCreate(n);
2871 for (i = 0; i < n; i++) {
2872 l_dnaGetIValue(dna, i, &loc);
2873 ret = sscanf((char *)&data[loc], "/MediaBox [ %f %f %f %f",
2874 &ignore1, &ignore2, &w, &h);
2875 if (ret != 4) {
2876 L_ERROR("mediabox sizes not found for item %d at loc %d\n",
2877 __func__, i, loc);
2878 continue;
2879 }
2880 numaAddNumber(naw, w);
2881 numaAddNumber(nah, h);
2882 }
2883 LEPT_FREE(data);
2884 l_dnaDestroy(&dna);
2885
2886 if (pmedw) {
2887 numaGetMedian(naw, &fval);
2888 *pmedw = lept_roundftoi(fval);
2889 if (*pmedw > 850) lept_stderr("oversize width: %d\n", *pmedw);
2890 }
2891 if (pnaw)
2892 *pnaw = naw;
2893 else
2894 numaDestroy(&naw);
2895 if (pmedh) {
2896 numaGetMedian(nah, &fval);
2897 *pmedh = lept_roundftoi(fval);
2898 if (*pmedh > 850) lept_stderr("oversize height: %d\n", *pmedh);
2899 }
2900 if (pnah)
2901 *pnah = nah;
2902 else
2903 numaDestroy(&nah);
2904 return 0;
2905}
2906
2907
2908/*---------------------------------------------------------------------*
2909 * Find effective resolution of images rendered from a pdf *
2910 *---------------------------------------------------------------------*/
2939l_ok
2940getPdfRendererResolution(const char *infile,
2941 const char *outdir,
2942 l_int32 *pres)
2943{
2944char buf[256];
2945char *tail, *basename, *fname;
2946l_int32 ret, res, medw, medh, medmax, npages, pageno, w, h;
2947SARRAY *sa;
2948
2949 if (!pres)
2950 return ERROR_INT("&res not defined", __func__, 1);
2951 *pres = 300; /* default */
2952
2953#ifdef _WIN32
2954 L_INFO("Requires pdftoppm, so this is disabled on windows.\n"
2955 "Returns default resolution 300 ppi", __func__);
2956 return 0;
2957#endif /* _WIN32 */
2958
2959 if (!LeptDebugOK) {
2960 L_INFO("Running pdftoppm is disabled; "
2961 "use setLeptDebugOK(1) to enable\n"
2962 "returns default resolution 300 ppi\n", __func__);
2963 return 1;
2964 }
2965
2966 if (!infile)
2967 return ERROR_INT("infile not defined", __func__, 1);
2968 if (!outdir)
2969 return ERROR_INT("outdir not defined", __func__, 1);
2970
2971 res = 300; /* default value */
2972 ret = getPdfMediaBoxSizes(infile, NULL, NULL, &medw, &medh);
2973 if (ret == 0) { /* Check for oversize mediaboxes */
2974 lept_stderr("Media Box medians: medw = %d, medh = %d\n", medw, medh);
2975 medmax = L_MAX(medw, medh);
2976 if (medmax > 850) {
2977 res = 300 * ((l_float32)792 / (l_float32)medmax);
2978 lept_stderr(" Oversize media box; use resolution = %d\n", res);
2979 *pres = res;
2980 }
2981 return 0;
2982 }
2983
2984 /* No mediaboxes; render one page and measure the max dimension */
2985 lept_stderr("Media Box dimensions not found\n");
2986 getPdfPageCount(infile, &npages);
2987 pageno = (npages > 0) ? (npages + 1) / 2 : 1;
2988 splitPathAtDirectory(infile, NULL, &tail);
2989 splitPathAtExtension(tail, &basename, NULL);
2990 snprintf(buf, sizeof(buf), "pdftoppm -f %d -l %d -r 72 %s %s/%s",
2991 pageno, pageno, infile, outdir, basename);
2992 LEPT_FREE(tail);
2993 LEPT_FREE(basename);
2994 callSystemDebug(buf); /* pdftoppm */
2995
2996 /* Get the page size */
2997 sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);
2998 fname = sarrayGetString(sa, 0, L_NOCOPY);
2999 pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);
3000 sarrayDestroy(&sa);
3001 if (w > 0 && h > 0) {
3002 res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);
3003 *pres = res;
3004 lept_stderr("Use resolution = %d\n", res);
3005 } else {
3006 L_ERROR("page size not found; assuming res = 300\n", __func__);
3007 }
3008
3009 return 0;
3010}
3011
3012
3013/*---------------------------------------------------------------------*
3014 * Set flags for special modes *
3015 *---------------------------------------------------------------------*/
3030void
3032{
3033 var_WRITE_G4_IMAGE_MASK = flag;
3034}
3035
3036
3050void
3052{
3053 var_WRITE_DATE_AND_VERSION = flag;
3054}
3055
3056/* --------------------------------------------*/
3057#endif /* USE_PDFIO */
3058/* --------------------------------------------*/
@ L_FLATE_ENCODE
Definition imageio.h:161
@ L_G4_ENCODE
Definition imageio.h:160
@ L_JP2K_ENCODE
Definition imageio.h:162
@ L_JPEG_ENCODE
Definition imageio.h:159
@ L_FIRST_IMAGE
Definition imageio.h:208
@ L_LAST_IMAGE
Definition imageio.h:210
void l_CIDataDestroy(L_COMP_DATA **pcid)
l_CIDataDestroy()
Definition pdfio2.c:1656
L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
l_generateJpegDataMem()
Definition pdfio2.c:1002
l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
pixGenerateCIData()
Definition pdfio2.c:1206
l_ok getPdfRendererResolution(const char *infile, const char *outdir, l_int32 *pres)
getPdfRendererResolution()
Definition pdfio2.c:2940
static L_COMP_DATA * pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag)
pixGenerateFlateData()
Definition pdfio2.c:1343
L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag)
l_generateFlateData()
Definition pdfio2.c:1308
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
Definition pdfio2.c:201
L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag)
l_generateJpegData()
Definition pdfio2.c:925
static char * generateEscapeString(const char *str)
generateEscapeString()
Definition pdfio2.c:1809
L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag)
l_generateG4Data()
Definition pdfio2.c:1114
static L_COMP_DATA * pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, l_int32 quality)
pixGenerateJpegData()
Definition pdfio2.c:1471
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
Definition pdfio2.c:3051
static L_BYTEA * substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs)
substituteObjectNumbers()
Definition pdfio2.c:2450
l_ok getPdfMediaBoxSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfMediaBoxSizes()
Definition pdfio2.c:2828
l_ok getPdfPageCount(const char *fname, l_int32 *pnpages)
getPdfPageCount()
Definition pdfio2.c:2621
l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout)
convertTiffMultipageToPdf()
Definition pdfio2.c:491
l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
l_generateCIData()
Definition pdfio2.c:625
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
Definition pdfio2.c:329
static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda)
parseTrailerPdf()
Definition pdfio2.c:2299
static L_COMP_DATA * pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag)
pixGenerateG4Data()
Definition pdfio2.c:1565
l_ok getPdfPageSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfPageSizes()
Definition pdfio2.c:2705
void l_pdfSetG4ImageMask(l_int32 flag)
l_pdfSetG4ImageMask()
Definition pdfio2.c:3031
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
Definition pdfio2.c:1609
static L_COMP_DATA * l_generateJp2kData(const char *fname)
l_generateJp2kData()
Definition pdfio2.c:1062
L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pixs)
l_generateFlateDataPdf()
Definition pdfio2.c:727
static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
l_generatePdf()
Definition pdfio2.c:1701
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
Definition pdfio2.c:543
static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
generateOutputDataPdf()
Definition pdfio2.c:2225
static L_COMP_DATA * pixGenerateJp2kData(PIX *pixs, l_int32 quality)
pixGenerateJp2kData()
Definition pdfio2.c:1519
@ L_COPY
Definition pix.h:505
@ L_CLONE
Definition pix.h:506
@ L_NOCOPY
Definition pix.h:503
@ L_INSERT
Definition pix.h:504
@ L_NO_COMPACTION
Definition ptra.h:79
l_int32 ncolors
Definition imageio.h:190
l_int32 predictor
Definition imageio.h:196
char * cmapdatahex
Definition imageio.h:189
l_uint8 * datacomp
Definition imageio.h:184
size_t nbytescomp
Definition imageio.h:185
l_int32 minisblack
Definition imageio.h:195
char * cmapdata85
Definition imageio.h:188
l_int32 xrefloc
Definition imageio.h:246
char * poststream
Definition imageio.h:237
struct Sarray * saprex
Definition imageio.h:242
struct L_Ptra * cida
Definition imageio.h:230
struct Pta * xy
Definition imageio.h:239
l_int32 ncmap
Definition imageio.h:229
char * obj2
Definition imageio.h:233
char * trailer
Definition imageio.h:238
char * obj1
Definition imageio.h:232
struct Sarray * sacmap
Definition imageio.h:243
l_int32 n
Definition imageio.h:228
struct L_Dna * objsize
Definition imageio.h:244
struct L_Dna * objloc
Definition imageio.h:245
char * title
Definition imageio.h:227
char * id
Definition imageio.h:231
struct Pta * wh
Definition imageio.h:240
char * obj5
Definition imageio.h:236
char * obj4
Definition imageio.h:235
struct Box * mediabox
Definition imageio.h:241
char * obj3
Definition imageio.h:234
Definition ptra.h:54