Leptonica 1.84.1
Image processing and image analysis suite
Loading...
Searching...
No Matches
pdfio2.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
105#ifdef HAVE_CONFIG_H
106#include <config_auto.h>
107#endif /* HAVE_CONFIG_H */
108
109#include <string.h>
110#include <math.h>
111#include "allheaders.h"
112
113/* --------------------------------------------*/
114#if USE_PDFIO /* defined in environ.h */
115 /* --------------------------------------------*/
116
117 /* Typical scan resolution in ppi (pixels/inch) */
118static const l_int32 DefaultInputRes = 300;
119
120 /* Static helpers */
121static L_COMP_DATA *l_generateJp2kData(const char *fname);
122static L_COMP_DATA *pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag);
123static L_COMP_DATA *pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag,
124 l_int32 quality);
125static L_COMP_DATA *pixGenerateJp2kData(PIX *pixs, l_int32 quality);
126static L_COMP_DATA *pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag);
127
128static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes,
129 L_PDF_DATA *lpd);
130static void generateFixedStringsPdf(L_PDF_DATA *lpd);
131static char *generateEscapeString(const char *str);
132static void generateMediaboxPdf(L_PDF_DATA *lpd);
133static l_int32 generatePageStringPdf(L_PDF_DATA *lpd);
134static l_int32 generateContentStringPdf(L_PDF_DATA *lpd);
135static l_int32 generatePreXStringsPdf(L_PDF_DATA *lpd);
136static l_int32 generateColormapStringsPdf(L_PDF_DATA *lpd);
137static void generateTrailerPdf(L_PDF_DATA *lpd);
138static char *makeTrailerStringPdf(L_DNA *daloc);
139static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes,
140 L_PDF_DATA *lpd);
141
142static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda);
143static char *generatePagesObjStringPdf(NUMA *napage);
144static L_BYTEA *substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs);
145
146static L_PDF_DATA *pdfdataCreate(const char *title);
147static void pdfdataDestroy(L_PDF_DATA **plpd);
148static L_COMP_DATA *pdfdataGetCid(L_PDF_DATA *lpd, l_int32 index);
149
150
151/* ---------------- Defaults for rendering options ----------------- */
152 /* Output G4 as writing through image mask; this is the default */
153static l_int32 var_WRITE_G4_IMAGE_MASK = 1;
154 /* Write date/time and lib version into pdf; this is the default */
155static l_int32 var_WRITE_DATE_AND_VERSION = 1;
156
157#define L_SMALLBUF 256
158#define L_BIGBUF 2048 /* must be able to hold hex colormap */
159
160
161#ifndef NO_CONSOLE_IO
162#define DEBUG_MULTIPAGE 0
163#endif /* ~NO_CONSOLE_IO */
164
165
166/*---------------------------------------------------------------------*
167 * Intermediate function for generating multipage pdf output *
168 *---------------------------------------------------------------------*/
200l_ok
202 l_int32 type,
203 l_int32 quality,
204 l_uint8 **pdata,
205 size_t *pnbytes,
206 l_int32 x,
207 l_int32 y,
208 l_int32 res,
209 const char *title,
210 L_PDF_DATA **plpd,
211 l_int32 position)
212{
213l_int32 pixres, w, h, ret;
214l_float32 xpt, ypt, wpt, hpt;
215L_COMP_DATA *cid = NULL;
216L_PDF_DATA *lpd = NULL;
217
218 if (!pdata)
219 return ERROR_INT("&data not defined", __func__, 1);
220 *pdata = NULL;
221 if (!pnbytes)
222 return ERROR_INT("&nbytes not defined", __func__, 1);
223 *pnbytes = 0;
224 if (!pix)
225 return ERROR_INT("pix not defined", __func__, 1);
226 if (type != L_JPEG_ENCODE && type != L_G4_ENCODE &&
227 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
228 selectDefaultPdfEncoding(pix, &type);
229 }
230 if (quality < 0 || quality > 100)
231 return ERROR_INT("invalid quality", __func__, 1);
232
233 if (plpd) { /* part of multi-page invocation */
234 if (position == L_FIRST_IMAGE)
235 *plpd = NULL;
236 }
237
238 /* Generate the compressed image data. It must NOT
239 * be ascii85 encoded. */
240 pixGenerateCIData(pix, type, quality, 0, &cid);
241 if (!cid)
242 return ERROR_INT("cid not made", __func__, 1);
243
244 /* Get media box in pts. Guess the input image resolution
245 * based on the input parameter %res, the resolution data in
246 * the pix, and the size of the image. */
247 pixres = cid->res;
248 w = cid->w;
249 h = cid->h;
250 if (res <= 0.0)
251 res = (pixres > 0) ? pixres : DefaultInputRes;
252 xpt = x * 72.f / res;
253 ypt = y * 72.f / res;
254 wpt = w * 72.f / res;
255 hpt = h * 72.f / res;
256
257 /* Set up lpd */
258 if (!plpd) { /* single image */
259 if ((lpd = pdfdataCreate(title)) == NULL)
260 return ERROR_INT("lpd not made", __func__, 1);
261 } else if (position == L_FIRST_IMAGE) { /* first of multiple images */
262 if ((lpd = pdfdataCreate(title)) == NULL)
263 return ERROR_INT("lpd not made", __func__, 1);
264 *plpd = lpd;
265 } else { /* not the first of multiple images */
266 lpd = *plpd;
267 }
268
269 /* Add the data to the lpd */
270 ptraAdd(lpd->cida, cid);
271 lpd->n++;
272 ptaAddPt(lpd->xy, xpt, ypt);
273 ptaAddPt(lpd->wh, wpt, hpt);
274
275 /* If a single image or the last of multiple images,
276 * generate the pdf and destroy the lpd */
277 if (!plpd || (position == L_LAST_IMAGE)) {
278 ret = l_generatePdf(pdata, pnbytes, lpd);
279 pdfdataDestroy(&lpd);
280 if (plpd) *plpd = NULL;
281 if (ret)
282 return ERROR_INT("pdf output not made", __func__, 1);
283 }
284
285 return 0;
286}
287
288
289/*---------------------------------------------------------------------*
290 * Intermediate function for generating multipage pdf output *
291 *---------------------------------------------------------------------*/
328l_ok
330 SARRAY *sa,
331 l_uint8 **pdata,
332 size_t *pnbytes)
333{
334char *fname, *str_pages, *str_trailer;
335l_uint8 *pdfdata, *data;
336l_int32 i, j, index, nobj, npages;
337l_int32 *sizes, *locs;
338size_t size;
339L_BYTEA *bas, *bad, *bat1, *bat2;
340L_DNA *da_locs, *da_sizes, *da_outlocs, *da;
341L_DNAA *daa_locs; /* object locations on each page */
342NUMA *na_objs, *napage;
343NUMAA *naa_objs; /* object mapping numbers to new values */
344
345 if (!pdata)
346 return ERROR_INT("&data not defined", __func__, 1);
347 *pdata = NULL;
348 if (!pnbytes)
349 return ERROR_INT("&nbytes not defined", __func__, 1);
350 *pnbytes = 0;
351 if (!pa_data)
352 return ERROR_INT("pa_data not defined", __func__, 1);
353
354 /* Parse the files and find the object locations.
355 * Remove file data that cannot be parsed. */
356 ptraGetActualCount(pa_data, &npages);
357 daa_locs = l_dnaaCreate(npages);
358 for (i = 0; i < npages; i++) {
359 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
360 if (parseTrailerPdf(bas, &da_locs) != 0) {
361 bas = (L_BYTEA *)ptraRemove(pa_data, i, L_NO_COMPACTION);
362 l_byteaDestroy(&bas);
363 if (sa) {
364 fname = sarrayGetString(sa, i, L_NOCOPY);
365 L_ERROR("can't parse file %s; skipping\n", __func__, fname);
366 } else {
367 L_ERROR("can't parse file %d; skipping\n", __func__, i);
368 }
369 } else {
370 l_dnaaAddDna(daa_locs, da_locs, L_INSERT);
371 }
372 }
373
374 /* Recompute npages in case some of the files were not pdf */
375 ptraCompactArray(pa_data);
376 ptraGetActualCount(pa_data, &npages);
377 if (npages == 0) {
378 l_dnaaDestroy(&daa_locs);
379 return ERROR_INT("no parsable pdf files found", __func__, 1);
380 }
381
382 /* Find the mapping from initial to final object numbers */
383 naa_objs = numaaCreate(npages); /* stores final object numbers */
384 napage = numaCreate(npages); /* stores "Page" object numbers */
385 index = 0;
386 for (i = 0; i < npages; i++) {
387 da = l_dnaaGetDna(daa_locs, i, L_CLONE);
388 nobj = l_dnaGetCount(da);
389 if (i == 0) {
390 numaAddNumber(napage, 4); /* object 4 on first page */
391 na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
392 index = nobj - 1;
393 } else { /* skip the first 3 objects in each file */
394 numaAddNumber(napage, index); /* Page object is first we add */
395 na_objs = numaMakeConstant(0.0, nobj - 1);
396 numaReplaceNumber(na_objs, 3, 3); /* refers to parent of all */
397 for (j = 4; j < nobj - 1; j++)
398 numaSetValue(na_objs, j, index++);
399 }
400 numaaAddNuma(naa_objs, na_objs, L_INSERT);
401 l_dnaDestroy(&da);
402 }
403
404 /* Make the Pages object (#3) */
405 str_pages = generatePagesObjStringPdf(napage);
406
407 /* Build the output */
408 bad = l_byteaCreate(5000);
409 da_outlocs = l_dnaCreate(0); /* locations of all output objects */
410 for (i = 0; i < npages; i++) {
411 bas = (L_BYTEA *)ptraGetPtrToItem(pa_data, i);
412 pdfdata = l_byteaGetData(bas, &size);
413 da_locs = l_dnaaGetDna(daa_locs, i, L_CLONE); /* locs on this page */
414 na_objs = numaaGetNuma(naa_objs, i, L_CLONE); /* obj # on this page */
415 nobj = l_dnaGetCount(da_locs) - 1;
416 da_sizes = l_dnaDiffAdjValues(da_locs); /* object sizes on this page */
417 sizes = l_dnaGetIArray(da_sizes);
418 locs = l_dnaGetIArray(da_locs);
419 if (i == 0) {
420 l_byteaAppendData(bad, pdfdata, sizes[0]);
421 l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
422 l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
423 l_byteaAppendString(bad, str_pages);
424 for (j = 0; j < 4; j++)
425 l_dnaAddNumber(da_outlocs, locs[j]);
426 }
427 for (j = 4; j < nobj; j++) {
428 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
429 bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
430 bat2 = substituteObjectNumbers(bat1, na_objs);
431 data = l_byteaGetData(bat2, &size);
432 l_byteaAppendData(bad, data, size);
433 l_byteaDestroy(&bat1);
434 l_byteaDestroy(&bat2);
435 }
436 if (i == npages - 1) /* last one */
437 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
438 LEPT_FREE(sizes);
439 LEPT_FREE(locs);
440 l_dnaDestroy(&da_locs);
441 numaDestroy(&na_objs);
442 l_dnaDestroy(&da_sizes);
443 }
444
445 /* Add the trailer */
446 str_trailer = makeTrailerStringPdf(da_outlocs);
447 l_byteaAppendString(bad, str_trailer);
448
449 /* Transfer the output data */
450 *pdata = l_byteaCopyData(bad, pnbytes);
451 l_byteaDestroy(&bad);
452
453#if DEBUG_MULTIPAGE
454 lept_stderr("******** object mapper **********");
455 numaaWriteStream(stderr, naa_objs);
456
457 lept_stderr("******** Page object numbers ***********");
458 numaWriteStderr(napage);
459
460 lept_stderr("******** Pages object ***********\n");
461 lept_stderr("%s\n", str_pages);
462#endif /* DEBUG_MULTIPAGE */
463
464 numaDestroy(&napage);
465 numaaDestroy(&naa_objs);
466 l_dnaDestroy(&da_outlocs);
467 l_dnaaDestroy(&daa_locs);
468 LEPT_FREE(str_pages);
469 LEPT_FREE(str_trailer);
470 return 0;
471}
472
473
474/*---------------------------------------------------------------------*
475 * Convert tiff multipage to pdf file *
476 *---------------------------------------------------------------------*/
490l_ok
491convertTiffMultipageToPdf(const char *filein,
492 const char *fileout)
493{
494l_int32 istiff;
495PIXA *pixa;
496FILE *fp;
497
498 if ((fp = fopenReadStream(filein)) == NULL)
499 return ERROR_INT_1("file not found", filein, __func__, 1);
500 istiff = fileFormatIsTiff(fp);
501 fclose(fp);
502 if (!istiff)
503 return ERROR_INT_1("file not tiff format", filein, __func__, 1);
504
505 pixa = pixaReadMultipageTiff(filein);
506 pixaConvertToPdf(pixa, 0, 1.0, 0, 0, "weasel2", fileout);
507 pixaDestroy(&pixa);
508 return 0;
509}
510
511
512/*---------------------------------------------------------------------*
513 * CID-based operations *
514 *---------------------------------------------------------------------*/
542l_ok
543l_generateCIDataForPdf(const char *fname,
544 PIX *pix,
545 l_int32 quality,
546 L_COMP_DATA **pcid)
547{
548l_int32 format, type;
549L_COMP_DATA *cid;
550PIX *pixt;
551
552 if (!pcid)
553 return ERROR_INT("&cid not defined", __func__, 1);
554 *pcid = cid = NULL;
555 if (!fname && !pix)
556 return ERROR_INT("neither fname nor pix are defined", __func__, 1);
557
558 /* If a compressed file is given that is not 'stdin', see if we
559 * can generate the pdf output without transcoding. */
560 if (fname && strcmp(fname, "-") != 0 && strcmp(fname, "stdin") != 0) {
561 findFileFormat(fname, &format);
562 if (format == IFF_UNKNOWN)
563 L_WARNING("file %s format is unknown\n", __func__, fname);
564 if (format == IFF_PS || format == IFF_LPDF) {
565 L_ERROR("file %s is unsupported format %d\n",
566 __func__, fname, format);
567 return 1;
568 }
569 if (format == IFF_JFIF_JPEG) {
570 cid = l_generateJpegData(fname, 0);
571 } else if (format == IFF_JP2) {
572 cid = l_generateJp2kData(fname);
573 } else if (format == IFF_PNG) {
574 cid = l_generateFlateDataPdf(fname, pix);
575 }
576 }
577
578 /* Otherwise, use the pix to generate the pdf output */
579 if (!cid) {
580 if (!pix)
581 pixt = pixRead(fname);
582 else
583 pixt = pixClone(pix);
584 if (!pixt)
585 return ERROR_INT("pixt not made", __func__, 1);
586 if (selectDefaultPdfEncoding(pixt, &type)) {
587 pixDestroy(&pixt);
588 return 1;
589 }
590 pixGenerateCIData(pixt, type, quality, 0, &cid);
591 pixDestroy(&pixt);
592 if (!cid)
593 return ERROR_INT("cid not made from pix", __func__, 1);
594 }
595 *pcid = cid;
596 return 0;
597}
598
599
624l_ok
625l_generateCIData(const char *fname,
626 l_int32 type,
627 l_int32 quality,
628 l_int32 ascii85,
629 L_COMP_DATA **pcid)
630{
631l_int32 format, d, bps, spp, iscmap;
632L_COMP_DATA *cid;
633PIX *pix;
634
635 if (!pcid)
636 return ERROR_INT("&cid not defined", __func__, 1);
637 *pcid = NULL;
638 if (!fname)
639 return ERROR_INT("fname not defined", __func__, 1);
640 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
641 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE)
642 return ERROR_INT("invalid conversion type", __func__, 1);
643 if (ascii85 != 0 && ascii85 != 1)
644 return ERROR_INT("invalid ascii85", __func__, 1);
645
646 /* Sanity check on requested encoding */
647 pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
648 d = bps * spp;
649 if (d == 24) d = 32;
650 if (iscmap && type != L_FLATE_ENCODE) {
651 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
652 type = L_FLATE_ENCODE;
653 } else if (d < 8 && type == L_JPEG_ENCODE) {
654 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
655 type = L_FLATE_ENCODE;
656 } else if (d < 8 && type == L_JP2K_ENCODE) {
657 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
658 type = L_FLATE_ENCODE;
659 } else if (d > 1 && type == L_G4_ENCODE) {
660 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
661 type = L_FLATE_ENCODE;
662 }
663
664 if (type == L_JPEG_ENCODE) {
665 if (format == IFF_JFIF_JPEG) { /* do not transcode */
666 cid = l_generateJpegData(fname, ascii85);
667 } else {
668 if ((pix = pixRead(fname)) == NULL)
669 return ERROR_INT("pix not returned for JPEG", __func__, 1);
670 cid = pixGenerateJpegData(pix, ascii85, quality);
671 pixDestroy(&pix);
672 }
673 if (!cid)
674 return ERROR_INT("jpeg data not made", __func__, 1);
675 } else if (type == L_JP2K_ENCODE) {
676 if (format == IFF_JP2) { /* do not transcode */
677 cid = l_generateJp2kData(fname);
678 } else {
679 if ((pix = pixRead(fname)) == NULL)
680 return ERROR_INT("pix not returned for JP2K", __func__, 1);
681 cid = pixGenerateJp2kData(pix, quality);
682 pixDestroy(&pix);
683 }
684 if (!cid)
685 return ERROR_INT("jp2k data not made", __func__, 1);
686 } else if (type == L_G4_ENCODE) {
687 if ((pix = pixRead(fname)) == NULL)
688 return ERROR_INT("pix not returned for G4", __func__, 1);
689 cid = pixGenerateG4Data(pix, ascii85);
690 pixDestroy(&pix);
691 if (!cid)
692 return ERROR_INT("g4 data not made", __func__, 1);
693 } else if (type == L_FLATE_ENCODE) {
694 if ((cid = l_generateFlateData(fname, ascii85)) == NULL)
695 return ERROR_INT("flate data not made", __func__, 1);
696 } else {
697 return ERROR_INT("invalid conversion type", __func__, 1);
698 }
699 *pcid = cid;
700
701 return 0;
702}
703
704
705/*---------------------------------------------------------------------*
706 * Low-level CID-based operations *
707 *---------------------------------------------------------------------*/
727l_generateFlateDataPdf(const char *fname,
728 PIX *pixs)
729{
730l_uint8 *pngcomp = NULL; /* entire PNG compressed file */
731l_uint8 *datacomp = NULL; /* gzipped raster data */
732l_uint8 *cmapdata = NULL; /* uncompressed colormap */
733char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
734l_uint32 i, j, n;
735l_int32 format, interlaced;
736l_int32 ncolors; /* in colormap */
737l_int32 bps; /* bits/sample: usually 8 */
738l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb; 4-rgba */
739l_int32 w, h, cmapflag;
740l_int32 xres, yres;
741size_t nbytescomp = 0, nbytespng = 0;
742FILE *fp;
743L_COMP_DATA *cid;
744PIX *pix;
745PIXCMAP *cmap = NULL;
746
747 if (!fname)
748 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
749
750 findFileFormat(fname, &format);
751 spp = 0; /* init to spp != 4 if not png */
752 interlaced = 0; /* initialize to no interlacing */
753 bps = 0; /* initialize to a nonsense value */
754 if (format == IFF_PNG) {
755 isPngInterlaced(fname, &interlaced);
756 if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
757 return (L_COMP_DATA *)ERROR_PTR("bad png input", __func__, NULL);
758 }
759
760 /* PDF is capable of inlining some types of PNG files, but not all
761 of them. We need to transcode anything with interlacing, an
762 alpha channel, or 1 bpp (which would otherwise be photo-inverted).
763
764 Note: any PNG image file with an alpha channel is converted on
765 reading to RGBA (spp == 4). This includes the (gray + alpha) format
766 with spp == 2. Because of the conversion, readHeaderPng() gives
767 spp = 2, whereas pixGetSpp() gives spp = 4 on the converted pix. */
768 if (format != IFF_PNG ||
769 (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
770 { /* lgtm+ analyzer needed the logic expanded */
771 if (!pixs)
772 pix = pixRead(fname);
773 else
774 pix = pixClone(pixs);
775 if (!pix)
776 return (L_COMP_DATA *)ERROR_PTR("pix not made", __func__, NULL);
777 cid = pixGenerateFlateData(pix, 0);
778 pixDestroy(&pix);
779 return cid;
780 }
781
782 /* It's png. Generate the pdf data without transcoding.
783 * Implementation by Jeff Breidenbach.
784 * First, read the metadata */
785 if ((fp = fopenReadStream(fname)) == NULL)
786 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
787 fname, __func__, NULL);
788 freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
789 fgetPngResolution(fp, &xres, &yres);
790 fclose(fp);
791
792 /* We get pdf corruption when inlining the data from 16 bpp png. */
793 if (bps == 16)
794 return l_generateFlateData(fname, 0);
795
796 /* Read the entire png file */
797 if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
798 return (L_COMP_DATA *)ERROR_PTR_1("unable to read file",
799 fname, __func__, NULL);
800
801 /* Extract flate data, copying portions of it to memory, including
802 * the predictor information in a byte at the beginning of each
803 * raster line. The flate data makes up the vast majority of
804 * the png file, so after extraction we expect datacomp to
805 * be nearly full (i.e., nbytescomp will be only slightly less
806 * than nbytespng). Also extract the colormap if present. */
807 if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
808 LEPT_FREE(pngcomp);
809 return (L_COMP_DATA *)ERROR_PTR("unable to allocate memory",
810 __func__, NULL);
811 }
812
813 /* Parse the png file. Each chunk consists of:
814 * length: 4 bytes
815 * name: 4 bytes (e.g., "IDAT")
816 * data: n bytes
817 * CRC: 4 bytes
818 * Start at the beginning of the data section of the first chunk,
819 * byte 16, because the png file begins with 8 bytes of header,
820 * followed by the first 8 bytes of the first chunk
821 * (length and name). On each loop, increment by 12 bytes to
822 * skip over the CRC, length and name of the next chunk. */
823 for (i = 16; i < nbytespng; i += 12) { /* do each successive chunk */
824 /* Get the chunk length */
825 n = pngcomp[i - 8] << 24;
826 n += pngcomp[i - 7] << 16;
827 n += pngcomp[i - 6] << 8;
828 n += pngcomp[i - 5] << 0;
829 if (n >= nbytespng - i) { /* "n + i" can overflow */
830 LEPT_FREE(pngcomp);
831 LEPT_FREE(datacomp);
832 pixcmapDestroy(&cmap);
833 L_ERROR("invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
834 i, n, nbytespng);
835 return NULL;
836 }
837
838 /* Is it a data chunk? */
839 if (memcmp(pngcomp + i - 4, "IDAT", 4) == 0) {
840 memcpy(datacomp + nbytescomp, pngcomp + i, n);
841 nbytescomp += n;
842 }
843
844 /* Is it a palette chunk? */
845 if (cmapflag && !cmap &&
846 memcmp(pngcomp + i - 4, "PLTE", 4) == 0) {
847 if ((n / 3) > (1 << bps)) {
848 LEPT_FREE(pngcomp);
849 LEPT_FREE(datacomp);
850 pixcmapDestroy(&cmap);
851 L_ERROR("invalid png: i = %d, n = %d, cmapsize = %d\n",
852 __func__, i, n, (1 << bps));
853 return NULL;
854 }
855 cmap = pixcmapCreate(bps);
856 for (j = i; j < i + n; j += 3) {
857 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
858 pngcomp[j + 2]);
859 }
860 }
861 i += n; /* move to the end of the data chunk */
862 }
863 LEPT_FREE(pngcomp);
864
865 if (nbytescomp == 0) {
866 LEPT_FREE(datacomp);
867 pixcmapDestroy(&cmap);
868 return (L_COMP_DATA *)ERROR_PTR("invalid PNG file", __func__, NULL);
869 }
870
871 /* Extract and encode the colormap data as hexascii */
872 ncolors = 0;
873 if (cmap) {
874 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
875 pixcmapDestroy(&cmap);
876 if (!cmapdata) {
877 LEPT_FREE(datacomp);
878 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
879 __func__, NULL);
880 }
881 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
882 LEPT_FREE(cmapdata);
883 }
884
885 /* Note that this is the only situation where the predictor
886 * field of the CID is set to 1. Adobe's predictor values on
887 * p. 76 of pdf_reference_1-7.pdf give 1 for no predictor and
888 * 10-14 for inline predictors, the specifics of which are
889 * ignored by the pdf interpreter, which just needs to know that
890 * the first byte on each compressed scanline is some predictor
891 * whose type can be inferred from the byte itself. */
892 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
893 cid->datacomp = datacomp;
894 cid->type = L_FLATE_ENCODE;
895 cid->cmapdatahex = cmapdatahex;
896 cid->nbytescomp = nbytescomp;
897 cid->ncolors = ncolors;
898 cid->predictor = TRUE;
899 cid->w = w;
900 cid->h = h;
901 cid->bps = bps;
902 cid->spp = spp;
903 cid->res = xres;
904 return cid;
905}
906
907
925l_generateJpegData(const char *fname,
926 l_int32 ascii85flag)
927{
928char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
929l_uint8 *data = NULL;
930l_int32 w, h, xres, yres, bps, spp;
931size_t nbytes, nbytes85;
932L_COMP_DATA *cid;
933FILE *fp;
934
935 if (!fname)
936 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
937
938 if (ascii85flag != 0 && ascii85flag != 1)
939 return (L_COMP_DATA *)ERROR_PTR("wrong ascii85flags", __func__, NULL);
940
941 /* Read the metadata */
942 if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
943 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
944 bps = 8;
945 if ((fp = fopenReadStream(fname)) == NULL)
946 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
947 fname, __func__, NULL);
948 fgetJpegResolution(fp, &xres, &yres);
949 fclose(fp);
950
951 /* Read the entire jpeg file. The returned jpeg data in memory
952 * starts with ffd8 and ends with ffd9 */
953 if ((data = l_binaryRead(fname, &nbytes)) == NULL)
954 return (L_COMP_DATA *)ERROR_PTR_1("data not extracted",
955 fname, __func__, NULL);
956
957 /* Optionally, encode the compressed data */
958 if (ascii85flag == 1) {
959 data85 = encodeAscii85(data, nbytes, &nbytes85);
960 LEPT_FREE(data);
961 if (!data85)
962 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
963 else
964 data85[nbytes85 - 1] = '\0'; /* remove the newline */
965 }
966
967 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
968 if (ascii85flag == 0) {
969 cid->datacomp = data;
970 } else { /* ascii85 */
971 cid->data85 = data85;
972 cid->nbytes85 = nbytes85;
973 }
974 cid->type = L_JPEG_ENCODE;
975 cid->nbytescomp = nbytes;
976 cid->w = w;
977 cid->h = h;
978 cid->bps = bps;
979 cid->spp = spp;
980 cid->res = xres;
981 return cid;
982}
983
984
1002 size_t nbytes,
1003 l_int32 ascii85flag)
1004{
1005char *data85 = NULL; /* ascii85 encoded jpeg compressed file */
1006l_int32 w, h, xres, yres, bps, spp;
1007size_t nbytes85;
1008L_COMP_DATA *cid;
1009
1010 if (!data)
1011 return (L_COMP_DATA *)ERROR_PTR("data not defined", __func__, NULL);
1012
1013 /* Read the metadata */
1014 if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
1015 LEPT_FREE(data);
1016 return (L_COMP_DATA *)ERROR_PTR("bad jpeg metadata", __func__, NULL);
1017 }
1018 bps = 8;
1019 readResolutionMemJpeg(data, nbytes, &xres, &yres);
1020
1021 /* Optionally, encode the compressed data */
1022 if (ascii85flag == 1) {
1023 data85 = encodeAscii85(data, nbytes, &nbytes85);
1024 LEPT_FREE(data);
1025 if (!data85)
1026 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1027 else
1028 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1029 }
1030
1031 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1032 if (ascii85flag == 0) {
1033 cid->datacomp = data;
1034 } else { /* ascii85 */
1035 cid->data85 = data85;
1036 cid->nbytes85 = nbytes85;
1037 }
1038 cid->type = L_JPEG_ENCODE;
1039 cid->nbytescomp = nbytes;
1040 cid->w = w;
1041 cid->h = h;
1042 cid->bps = bps;
1043 cid->spp = spp;
1044 cid->res = xres;
1045 return cid;
1046}
1047
1048
1060static L_COMP_DATA *
1061l_generateJp2kData(const char *fname)
1062{
1063l_int32 w, h, bps, spp, xres, yres;
1064size_t nbytes;
1065L_COMP_DATA *cid;
1066FILE *fp;
1067
1068 if (!fname)
1069 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1070
1071 if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
1072 return (L_COMP_DATA *)ERROR_PTR("bad jp2k metadata", __func__, NULL);
1073
1074 /* The returned jp2k data in memory is the entire jp2k file */
1075 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1076 if ((cid->datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
1077 l_CIDataDestroy(&cid);
1078 return (L_COMP_DATA *)ERROR_PTR("data not extracted", __func__, NULL);
1079 }
1080
1081 xres = yres = 0;
1082 if ((fp = fopenReadStream(fname)) != NULL) {
1083 fgetJp2kResolution(fp, &xres, &yres);
1084 fclose(fp);
1085 }
1086 cid->type = L_JP2K_ENCODE;
1087 cid->nbytescomp = nbytes;
1088 cid->w = w;
1089 cid->h = h;
1090 cid->bps = bps;
1091 cid->spp = spp;
1092 cid->res = xres;
1093 return cid;
1094}
1095
1096
1113l_generateG4Data(const char *fname,
1114 l_int32 ascii85flag)
1115{
1116l_uint8 *datacomp = NULL; /* g4 compressed raster data */
1117char *data85 = NULL; /* ascii85 encoded g4 compressed data */
1118l_int32 w, h, xres, yres, npages;
1119l_int32 minisblack; /* TRUE or FALSE */
1120size_t nbytes85, nbytescomp;
1121L_COMP_DATA *cid;
1122FILE *fp;
1123
1124 if (!fname)
1125 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1126
1127 /* Make sure this is a single page tiff file */
1128 if ((fp = fopenReadStream(fname)) == NULL)
1129 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
1130 fname, __func__, NULL);
1131 tiffGetCount(fp, &npages);
1132 fclose(fp);
1133 if (npages != 1) {
1134 L_ERROR(" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname);
1135 return NULL;
1136 }
1137
1138 /* Read the resolution */
1139 if ((fp = fopenReadStream(fname)) == NULL)
1140 return (L_COMP_DATA *)ERROR_PTR_1("stream not opened",
1141 fname, __func__, NULL);
1142 getTiffResolution(fp, &xres, &yres);
1143 fclose(fp);
1144
1145 /* The returned ccitt g4 data in memory is the block of
1146 * bytes in the tiff file, starting after 8 bytes and
1147 * ending before the directory. */
1148 if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
1149 &w, &h, &minisblack)) {
1150 return (L_COMP_DATA *)ERROR_PTR_1("datacomp not extracted",
1151 fname, __func__, NULL);
1152 }
1153
1154 /* Optionally, encode the compressed data */
1155 if (ascii85flag == 1) {
1156 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1157 LEPT_FREE(datacomp);
1158 if (!data85)
1159 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1160 else
1161 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1162 }
1163
1164 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1165 if (ascii85flag == 0) {
1166 cid->datacomp = datacomp;
1167 } else { /* ascii85 */
1168 cid->data85 = data85;
1169 cid->nbytes85 = nbytes85;
1170 }
1171 cid->type = L_G4_ENCODE;
1172 cid->nbytescomp = nbytescomp;
1173 cid->w = w;
1174 cid->h = h;
1175 cid->bps = 1;
1176 cid->spp = 1;
1177 cid->minisblack = minisblack;
1178 cid->res = xres;
1179 return cid;
1180}
1181
1182
1203l_ok
1205 l_int32 type,
1206 l_int32 quality,
1207 l_int32 ascii85,
1208 L_COMP_DATA **pcid)
1209{
1210l_int32 w, h, d, maxAsp;
1211PIXCMAP *cmap;
1212
1213 if (!pcid)
1214 return ERROR_INT("&cid not defined", __func__, 1);
1215 *pcid = NULL;
1216 if (!pixs)
1217 return ERROR_INT("pixs not defined", __func__, 1);
1218 if (type != L_G4_ENCODE && type != L_JPEG_ENCODE &&
1219 type != L_FLATE_ENCODE && type != L_JP2K_ENCODE) {
1220 selectDefaultPdfEncoding(pixs, &type);
1221 }
1222 if (ascii85 != 0 && ascii85 != 1)
1223 return ERROR_INT("invalid ascii85", __func__, 1);
1224 pixGetDimensions(pixs, &w, &h, NULL);
1225 if (w == 0 || h == 0)
1226 return ERROR_INT("invalid w or h", __func__, 1);
1227 maxAsp = L_MAX(w / h, h / w);
1228 if (maxAsp > 10)
1229 return ERROR_INT("max asperity > 10", __func__, 1);
1230
1231 /* Conditionally modify the encoding type if libz is
1232 * available and the requested library is missing. */
1233#if defined(HAVE_LIBZ)
1234# if !defined(HAVE_LIBJPEG)
1235 if (type == L_JPEG_ENCODE) {
1236 L_WARNING("no libjpeg; using flate encoding\n", __func__);
1237 type = L_FLATE_ENCODE;
1238 }
1239# endif /* !defined(HAVE_LIBJPEG) */
1240# if !defined(HAVE_LIBJP2K)
1241 if (type == L_JP2K_ENCODE) {
1242 L_WARNING("no libjp2k; using flate encoding\n", __func__);
1243 type = L_FLATE_ENCODE;
1244 }
1245# endif /* !defined(HAVE_LIBJP2K) */
1246# if !defined(HAVE_LIBTIFF)
1247 if (type == L_G4_ENCODE) {
1248 L_WARNING("no libtiff; using flate encoding\n", __func__);
1249 type = L_FLATE_ENCODE;
1250 }
1251# endif /* !defined(HAVE_LIBTIFF) */
1252#endif /* defined(HAVE_LIBZ) */
1253
1254 /* Sanity check on requested encoding */
1255 d = pixGetDepth(pixs);
1256 cmap = pixGetColormap(pixs);
1257 if (cmap && type != L_FLATE_ENCODE) {
1258 L_WARNING("pixs has cmap; using flate encoding\n", __func__);
1259 type = L_FLATE_ENCODE;
1260 } else if (d < 8 && (type == L_JPEG_ENCODE || type == L_JP2K_ENCODE)) {
1261 L_WARNING("pixs has < 8 bpp; using flate encoding\n", __func__);
1262 type = L_FLATE_ENCODE;
1263 } else if (d > 1 && type == L_G4_ENCODE) {
1264 L_WARNING("pixs has > 1 bpp; using flate encoding\n", __func__);
1265 type = L_FLATE_ENCODE;
1266 }
1267
1268 if (type == L_JPEG_ENCODE) {
1269 if ((*pcid = pixGenerateJpegData(pixs, ascii85, quality)) == NULL)
1270 return ERROR_INT("jpeg data not made", __func__, 1);
1271 } else if (type == L_JP2K_ENCODE) {
1272 if ((*pcid = pixGenerateJp2kData(pixs, quality)) == NULL)
1273 return ERROR_INT("jp2k data not made", __func__, 1);
1274 } else if (type == L_G4_ENCODE) {
1275 if ((*pcid = pixGenerateG4Data(pixs, ascii85)) == NULL)
1276 return ERROR_INT("g4 data not made", __func__, 1);
1277 } else { /* type == L_FLATE_ENCODE */
1278 if ((*pcid = pixGenerateFlateData(pixs, ascii85)) == NULL)
1279 return ERROR_INT("flate data not made", __func__, 1);
1280 }
1281 return 0;
1282}
1283
1284
1306l_generateFlateData(const char *fname,
1307 l_int32 ascii85flag)
1308{
1309L_COMP_DATA *cid;
1310PIX *pixs;
1311
1312 if (!fname)
1313 return (L_COMP_DATA *)ERROR_PTR("fname not defined", __func__, NULL);
1314
1315 if ((pixs = pixRead(fname)) == NULL)
1316 return (L_COMP_DATA *)ERROR_PTR("pixs not made", __func__, NULL);
1317 cid = pixGenerateFlateData(pixs, ascii85flag);
1318 pixDestroy(&pixs);
1319 return cid;
1320}
1321
1322
1340static L_COMP_DATA *
1342 l_int32 ascii85flag)
1343{
1344l_uint8 *data = NULL; /* uncompressed raster data in required format */
1345l_uint8 *datacomp = NULL; /* gzipped raster data */
1346char *data85 = NULL; /* ascii85 encoded gzipped raster data */
1347l_uint8 *cmapdata = NULL; /* uncompressed colormap */
1348char *cmapdata85 = NULL; /* ascii85 encoded uncompressed colormap */
1349char *cmapdatahex = NULL; /* hex ascii uncompressed colormap */
1350l_int32 ncolors; /* in colormap; not used if cmapdata85 is null */
1351l_int32 bps; /* bits/sample: usually 8 */
1352l_int32 spp; /* samples/pixel: 1-grayscale/cmap); 3-rgb */
1353l_int32 w, h, d, cmapflag;
1354size_t ncmapbytes85 = 0;
1355size_t nbytes85 = 0;
1356size_t nbytes, nbytescomp;
1357L_COMP_DATA *cid;
1358PIX *pixt;
1359PIXCMAP *cmap;
1360
1361 if (!pixs)
1362 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1363
1364 /* Convert the image to one of these 4 types:
1365 * 1 bpp
1366 * 8 bpp, no colormap
1367 * 8 bpp, colormap
1368 * 32 bpp rgb */
1369 pixGetDimensions(pixs, &w, &h, &d);
1370 cmap = pixGetColormap(pixs);
1371 cmapflag = (cmap) ? 1 : 0;
1372 if (d == 2 || d == 4 || d == 16) {
1373 pixt = pixConvertTo8(pixs, cmapflag);
1374 cmap = pixGetColormap(pixt);
1375 d = pixGetDepth(pixt);
1376 } else if (d == 32 && pixGetSpp(pixs) == 4) { /* remove alpha */
1377 pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
1378 } else {
1379 pixt = pixClone(pixs);
1380 }
1381 if (!pixt)
1382 return (L_COMP_DATA *)ERROR_PTR("pixt not made", __func__, NULL);
1383 spp = (d == 32) ? 3 : 1;
1384 bps = (d == 32) ? 8 : d;
1385
1386 /* Extract and encode the colormap data as both ascii85 and hexascii */
1387 ncolors = 0;
1388 if (cmap) {
1389 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
1390 if (!cmapdata) {
1391 pixDestroy(&pixt);
1392 return (L_COMP_DATA *)ERROR_PTR("cmapdata not made",
1393 __func__, NULL);
1394 }
1395
1396 cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
1397 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
1398 LEPT_FREE(cmapdata);
1399 }
1400
1401 /* Extract and compress the raster data */
1402 pixGetRasterData(pixt, &data, &nbytes);
1403 pixDestroy(&pixt);
1404 if (!data) {
1405 LEPT_FREE(cmapdata85);
1406 LEPT_FREE(cmapdatahex);
1407 return (L_COMP_DATA *)ERROR_PTR("data not returned", __func__, NULL);
1408 }
1409 datacomp = zlibCompress(data, nbytes, &nbytescomp);
1410 LEPT_FREE(data);
1411 if (!datacomp) {
1412 LEPT_FREE(cmapdata85);
1413 LEPT_FREE(cmapdatahex);
1414 return (L_COMP_DATA *)ERROR_PTR("datacomp not made", __func__, NULL);
1415 }
1416
1417 /* Optionally, encode the compressed data */
1418 if (ascii85flag == 1) {
1419 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1420 LEPT_FREE(datacomp);
1421 if (!data85) {
1422 LEPT_FREE(cmapdata85);
1423 LEPT_FREE(cmapdatahex);
1424 return (L_COMP_DATA *)ERROR_PTR("data85 not made", __func__, NULL);
1425 } else {
1426 data85[nbytes85 - 1] = '\0'; /* remove the newline */
1427 }
1428 }
1429
1430 cid = (L_COMP_DATA *)LEPT_CALLOC(1, sizeof(L_COMP_DATA));
1431 if (ascii85flag == 0) {
1432 cid->datacomp = datacomp;
1433 } else { /* ascii85 */
1434 cid->data85 = data85;
1435 cid->nbytes85 = nbytes85;
1436 }
1437 cid->type = L_FLATE_ENCODE;
1438 cid->cmapdatahex = cmapdatahex;
1439 cid->cmapdata85 = cmapdata85;
1440 cid->nbytescomp = nbytescomp;
1441 cid->ncolors = ncolors;
1442 cid->w = w;
1443 cid->h = h;
1444 cid->bps = bps;
1445 cid->spp = spp;
1446 cid->res = pixGetXRes(pixs);
1447 cid->nbytes = nbytes; /* only for debugging */
1448 return cid;
1449}
1450
1451
1468static L_COMP_DATA *
1470 l_int32 ascii85flag,
1471 l_int32 quality)
1472{
1473l_int32 d;
1474char *fname;
1475L_COMP_DATA *cid;
1476
1477 if (!pixs)
1478 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1479 if (pixGetColormap(pixs))
1480 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1481 d = pixGetDepth(pixs);
1482 if (d != 8 && d != 16 && d != 32)
1483 return (L_COMP_DATA *)ERROR_PTR("pixs not 8, 16 or 32 bpp",
1484 __func__, NULL);
1485
1486 /* Compress to a temp jpeg file */
1487 fname = l_makeTempFilename();
1488 if (pixWriteJpeg(fname, pixs, quality, 0)) {
1489 LEPT_FREE(fname);
1490 return NULL;
1491 }
1492
1493 /* Generate the data */
1494 cid = l_generateJpegData(fname, ascii85flag);
1495 if (lept_rmfile(fname) != 0)
1496 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1497 LEPT_FREE(fname);
1498 return cid;
1499}
1500
1501
1516static L_COMP_DATA *
1518 l_int32 quality)
1519{
1520l_int32 d;
1521char *fname;
1522L_COMP_DATA *cid;
1523
1524 if (!pixs)
1525 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1526 if (pixGetColormap(pixs))
1527 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1528 d = pixGetDepth(pixs);
1529 if (d != 8 && d != 32)
1530 return (L_COMP_DATA *)ERROR_PTR("pixs not 8 or 32 bpp", __func__, NULL);
1531
1532 /* Compress to a temp jp2k file */
1533 fname = l_makeTempFilename();
1534 if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
1535 LEPT_FREE(fname);
1536 return NULL;
1537 }
1538
1539 /* Generate the data */
1540 cid = l_generateJp2kData(fname);
1541 if (lept_rmfile(fname) != 0)
1542 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1543 LEPT_FREE(fname);
1544 return cid;
1545}
1546
1547
1562static L_COMP_DATA *
1564 l_int32 ascii85flag)
1565{
1566char *fname;
1567L_COMP_DATA *cid;
1568
1569 if (!pixs)
1570 return (L_COMP_DATA *)ERROR_PTR("pixs not defined", __func__, NULL);
1571 if (pixGetDepth(pixs) != 1)
1572 return (L_COMP_DATA *)ERROR_PTR("pixs not 1 bpp", __func__, NULL);
1573 if (pixGetColormap(pixs))
1574 return (L_COMP_DATA *)ERROR_PTR("pixs has colormap", __func__, NULL);
1575
1576 /* Compress to a temp tiff g4 file */
1577 fname = l_makeTempFilename();
1578 if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
1579 LEPT_FREE(fname);
1580 return NULL;
1581 }
1582
1583 cid = l_generateG4Data(fname, ascii85flag);
1584 if (lept_rmfile(fname) != 0)
1585 L_ERROR("temp file %s was not deleted\n", __func__, fname);
1586 LEPT_FREE(fname);
1587 return cid;
1588}
1589
1590
1606l_ok
1608 const char *title,
1609 l_uint8 **pdata,
1610 size_t *pnbytes)
1611{
1612l_int32 res, ret;
1613l_float32 wpt, hpt;
1614L_PDF_DATA *lpd = NULL;
1615
1616 if (!pdata || !pnbytes)
1617 return ERROR_INT("&data and &nbytes not both defined", __func__, 1);
1618 *pdata = NULL;
1619 *pnbytes = 0;
1620 if (!cid)
1621 return ERROR_INT("cid not defined", __func__, 1);
1622
1623 /* Get media box parameters, in pts */
1624 res = cid->res;
1625 if (res <= 0)
1626 res = DefaultInputRes;
1627 wpt = cid->w * 72.f / res;
1628 hpt = cid->h * 72.f / res;
1629
1630 /* Set up the pdf data struct (lpd) */
1631 if ((lpd = pdfdataCreate(title)) == NULL)
1632 return ERROR_INT("lpd not made", __func__, 1);
1633 ptraAdd(lpd->cida, cid);
1634 lpd->n++;
1635 ptaAddPt(lpd->xy, 0, 0); /* xpt = ypt = 0 */
1636 ptaAddPt(lpd->wh, wpt, hpt);
1637
1638 /* Generate the pdf string and destroy the lpd */
1639 ret = l_generatePdf(pdata, pnbytes, lpd);
1640 pdfdataDestroy(&lpd);
1641 if (ret)
1642 return ERROR_INT("pdf output not made", __func__, 1);
1643 return 0;
1644}
1645
1646
1653void
1655{
1656L_COMP_DATA *cid;
1657
1658 if (pcid == NULL) {
1659 L_WARNING("ptr address is null!\n", __func__);
1660 return;
1661 }
1662 if ((cid = *pcid) == NULL)
1663 return;
1664
1665 if (cid->datacomp) LEPT_FREE(cid->datacomp);
1666 if (cid->data85) LEPT_FREE(cid->data85);
1667 if (cid->cmapdata85) LEPT_FREE(cid->cmapdata85);
1668 if (cid->cmapdatahex) LEPT_FREE(cid->cmapdatahex);
1669 LEPT_FREE(cid);
1670 *pcid = NULL;
1671}
1672
1673
1674/*---------------------------------------------------------------------*
1675 * Helper functions for generating the output pdf string *
1676 *---------------------------------------------------------------------*/
1698static l_int32
1699l_generatePdf(l_uint8 **pdata,
1700 size_t *pnbytes,
1701 L_PDF_DATA *lpd)
1702{
1703 if (!pdata)
1704 return ERROR_INT("&data not defined", __func__, 1);
1705 *pdata = NULL;
1706 if (!pnbytes)
1707 return ERROR_INT("&nbytes not defined", __func__, 1);
1708 *pnbytes = 0;
1709 if (!lpd)
1710 return ERROR_INT("lpd not defined", __func__, 1);
1711
1712 generateFixedStringsPdf(lpd);
1713 generateMediaboxPdf(lpd);
1714 generatePageStringPdf(lpd);
1715 generateContentStringPdf(lpd);
1716 generatePreXStringsPdf(lpd);
1717 generateColormapStringsPdf(lpd);
1718 generateTrailerPdf(lpd);
1719 return generateOutputDataPdf(pdata, pnbytes, lpd);
1720}
1721
1722
1723static void
1724generateFixedStringsPdf(L_PDF_DATA *lpd)
1725{
1726char buf[L_SMALLBUF];
1727char *version, *datestr;
1728SARRAY *sa;
1729
1730 /* Accumulate data for the header and objects 1-3 */
1731 lpd->id = stringNew("%PDF-1.5\n");
1732 l_dnaAddNumber(lpd->objsize, strlen(lpd->id));
1733
1734 lpd->obj1 = stringNew("1 0 obj\n"
1735 "<<\n"
1736 "/Type /Catalog\n"
1737 "/Pages 3 0 R\n"
1738 ">>\n"
1739 "endobj\n");
1740 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj1));
1741
1742 sa = sarrayCreate(0);
1743 sarrayAddString(sa, "2 0 obj\n"
1744 "<<\n", L_COPY);
1745 if (var_WRITE_DATE_AND_VERSION) {
1746 datestr = l_getFormattedDate();
1747 snprintf(buf, sizeof(buf), "/CreationDate (D:%s)\n", datestr);
1748 sarrayAddString(sa, buf, L_COPY);
1749 LEPT_FREE(datestr);
1750 version = getLeptonicaVersion();
1751 snprintf(buf, sizeof(buf),
1752 "/Producer (leptonica: %s)\n", version);
1753 LEPT_FREE(version);
1754 } else {
1755 snprintf(buf, sizeof(buf), "/Producer (leptonica)\n");
1756 }
1757 sarrayAddString(sa, buf, L_COPY);
1758 if (lpd->title) {
1759 char *hexstr;
1760 if ((hexstr = generateEscapeString(lpd->title)) != NULL) {
1761 snprintf(buf, sizeof(buf), "/Title %s\n", hexstr);
1762 sarrayAddString(sa, buf, L_COPY);
1763 } else {
1764 L_ERROR("title string is not ascii\n", __func__);
1765 }
1766 LEPT_FREE(hexstr);
1767 }
1768 sarrayAddString(sa, ">>\n"
1769 "endobj\n", L_COPY);
1770 lpd->obj2 = sarrayToString(sa, 0);
1771 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj2));
1772 sarrayDestroy(&sa);
1773
1774 lpd->obj3 = stringNew("3 0 obj\n"
1775 "<<\n"
1776 "/Type /Pages\n"
1777 "/Kids [ 4 0 R ]\n"
1778 "/Count 1\n"
1779 ">>\n");
1780 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj3));
1781
1782 /* Do the post-datastream string */
1783 lpd->poststream = stringNew("\n"
1784 "endstream\n"
1785 "endobj\n");
1786}
1787
1788
1806static char *
1807generateEscapeString(const char *str)
1808{
1809char smallbuf[8];
1810char *buffer;
1811l_int32 i, nchar, buflen;
1812
1813 if (!str)
1814 return (char *)ERROR_PTR("str not defined", __func__, NULL);
1815 nchar = strlen(str);
1816 for (i = 0; i < nchar; i++) {
1817 if (str[i] < 0)
1818 return (char *)ERROR_PTR("str not all ascii", __func__, NULL);
1819 }
1820
1821 buflen = 4 * nchar + 10;
1822 buffer = (char *)LEPT_CALLOC(buflen, sizeof(char));
1823 stringCat(buffer, buflen, "<feff");
1824 for (i = 0; i < nchar; i++) {
1825 snprintf(smallbuf, sizeof(smallbuf), "%04x", str[i]);
1826 stringCat(buffer, buflen, smallbuf);
1827 }
1828 stringCat(buffer, buflen, ">");
1829 return buffer;
1830}
1831
1832
1833static void
1834generateMediaboxPdf(L_PDF_DATA *lpd)
1835{
1836l_int32 i;
1837l_float32 xpt, ypt, wpt, hpt, maxx, maxy;
1838
1839 /* First get the full extent of all the images.
1840 * This is the mediabox, in pts. */
1841 maxx = maxy = 0;
1842 for (i = 0; i < lpd->n; i++) {
1843 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1844 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1845 maxx = L_MAX(maxx, xpt + wpt);
1846 maxy = L_MAX(maxy, ypt + hpt);
1847 }
1848
1849 lpd->mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
1850 (l_int32)(maxy + 0.5));
1851
1852 /* ypt is in standard image coordinates: the location of
1853 * the UL image corner with respect to the UL media box corner.
1854 * Rewrite each ypt for PostScript coordinates: the location of
1855 * the LL image corner with respect to the LL media box corner. */
1856 for (i = 0; i < lpd->n; i++) {
1857 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1858 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1859 ptaSetPt(lpd->xy, i, xpt, maxy - ypt - hpt);
1860 }
1861}
1862
1863
1864static l_int32
1865generatePageStringPdf(L_PDF_DATA *lpd)
1866{
1867char *buf;
1868char *xstr;
1869l_int32 bufsize, i, wpt, hpt;
1870SARRAY *sa;
1871
1872 /* Allocate 1000 bytes for the boilerplate text, and
1873 * 50 bytes for each reference to an image in the
1874 * ProcSet array. */
1875 bufsize = 1000 + 50 * lpd->n;
1876 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1877 return ERROR_INT("calloc fail for buf", __func__, 1);
1878
1879 boxGetGeometry(lpd->mediabox, NULL, NULL, &wpt, &hpt);
1880 sa = sarrayCreate(lpd->n);
1881 for (i = 0; i < lpd->n; i++) {
1882 snprintf(buf, bufsize, "/Im%d %d 0 R ", i + 1, 6 + i);
1883 sarrayAddString(sa, buf, L_COPY);
1884 }
1885 xstr = sarrayToString(sa, 0);
1886 sarrayDestroy(&sa);
1887 if (!xstr) {
1888 LEPT_FREE(buf);
1889 return ERROR_INT("xstr not made", __func__, 1);
1890 }
1891
1892 snprintf(buf, bufsize, "4 0 obj\n"
1893 "<<\n"
1894 "/Type /Page\n"
1895 "/Parent 3 0 R\n"
1896 "/MediaBox [%d %d %d %d]\n"
1897 "/Contents 5 0 R\n"
1898 "/Resources\n"
1899 "<<\n"
1900 "/XObject << %s >>\n"
1901 "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
1902 ">>\n"
1903 ">>\n"
1904 "endobj\n",
1905 0, 0, wpt, hpt, xstr);
1906
1907 lpd->obj4 = stringNew(buf);
1908 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj4));
1909 sarrayDestroy(&sa);
1910 LEPT_FREE(buf);
1911 LEPT_FREE(xstr);
1912 return 0;
1913}
1914
1915
1916static l_int32
1917generateContentStringPdf(L_PDF_DATA *lpd)
1918{
1919char *buf;
1920char *cstr;
1921l_int32 i, bufsize;
1922l_float32 xpt, ypt, wpt, hpt;
1923SARRAY *sa;
1924
1925 bufsize = 1000 + 200 * lpd->n;
1926 if ((buf = (char *)LEPT_CALLOC(bufsize, sizeof(char))) == NULL)
1927 return ERROR_INT("calloc fail for buf", __func__, 1);
1928
1929 sa = sarrayCreate(lpd->n);
1930 for (i = 0; i < lpd->n; i++) {
1931 ptaGetPt(lpd->xy, i, &xpt, &ypt);
1932 ptaGetPt(lpd->wh, i, &wpt, &hpt);
1933 snprintf(buf, bufsize,
1934 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
1935 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
1936 sarrayAddString(sa, buf, L_COPY);
1937 }
1938 cstr = sarrayToString(sa, 0);
1939 sarrayDestroy(&sa);
1940 if (!cstr) {
1941 LEPT_FREE(buf);
1942 return ERROR_INT("cstr not made", __func__, 1);
1943 }
1944
1945 snprintf(buf, bufsize, "5 0 obj\n"
1946 "<< /Length %d >>\n"
1947 "stream\n"
1948 "%s"
1949 "endstream\n"
1950 "endobj\n",
1951 (l_int32)strlen(cstr), cstr);
1952
1953 lpd->obj5 = stringNew(buf);
1954 l_dnaAddNumber(lpd->objsize, strlen(lpd->obj5));
1955 sarrayDestroy(&sa);
1956 LEPT_FREE(buf);
1957 LEPT_FREE(cstr);
1958 return 0;
1959}
1960
1961
1962static l_int32
1963generatePreXStringsPdf(L_PDF_DATA *lpd)
1964{
1965char buff[256];
1966char buf[L_BIGBUF];
1967char *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
1968l_int32 i, cmindex;
1969L_COMP_DATA *cid;
1970SARRAY *sa;
1971
1972 sa = lpd->saprex;
1973 cmindex = 6 + lpd->n; /* starting value */
1974 for (i = 0; i < lpd->n; i++) {
1975 pstr = cstr = NULL;
1976 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
1977 return ERROR_INT("cid not found", __func__, 1);
1978
1979 if (cid->type == L_G4_ENCODE) {
1980 if (var_WRITE_G4_IMAGE_MASK) {
1981 cstr = stringNew("/ImageMask true\n"
1982 "/ColorSpace /DeviceGray");
1983 } else {
1984 cstr = stringNew("/ColorSpace /DeviceGray");
1985 }
1986 bstr = stringNew("/BitsPerComponent 1\n"
1987 "/Interpolate true");
1988 /* Note: the reversal is deliberate. The BlackIs1 flag
1989 * is misleadingly named: it says whether to invert the
1990 * image on decoding because the black pixels are 0,
1991 * not whether the black pixels are 1! The default for
1992 * BlackIs1 is "false", which means "don't invert because
1993 * black is 1." Yikes. */
1994 photometry = (cid->minisblack) ? stringNew("true")
1995 : stringNew("false");
1996 snprintf(buff, sizeof(buff),
1997 "/Filter /CCITTFaxDecode\n"
1998 "/DecodeParms\n"
1999 "<<\n"
2000 "/BlackIs1 %s\n"
2001 "/K -1\n"
2002 "/Columns %d\n"
2003 ">>", photometry, cid->w);
2004 fstr = stringNew(buff);
2005 LEPT_FREE(photometry);
2006 } else if (cid->type == L_JPEG_ENCODE) {
2007 if (cid->spp == 1)
2008 cstr = stringNew("/ColorSpace /DeviceGray");
2009 else if (cid->spp == 3)
2010 cstr = stringNew("/ColorSpace /DeviceRGB");
2011 else if (cid->spp == 4) /* pdf supports cmyk */
2012 cstr = stringNew("/ColorSpace /DeviceCMYK");
2013 else
2014 L_ERROR("in jpeg: spp != 1, 3 or 4\n", __func__);
2015 bstr = stringNew("/BitsPerComponent 8");
2016 fstr = stringNew("/Filter /DCTDecode");
2017 } else if (cid->type == L_JP2K_ENCODE) {
2018 if (cid->spp == 1)
2019 cstr = stringNew("/ColorSpace /DeviceGray");
2020 else if (cid->spp == 3)
2021 cstr = stringNew("/ColorSpace /DeviceRGB");
2022 else
2023 L_ERROR("in jp2k: spp != 1 && spp != 3\n", __func__);
2024 bstr = stringNew("/BitsPerComponent 8");
2025 fstr = stringNew("/Filter /JPXDecode");
2026 } else { /* type == L_FLATE_ENCODE */
2027 if (cid->ncolors > 0) { /* cmapped */
2028 snprintf(buff, sizeof(buff), "/ColorSpace %d 0 R", cmindex++);
2029 cstr = stringNew(buff);
2030 } else {
2031 if (cid->spp == 1 && cid->bps == 1)
2032 cstr = stringNew("/ColorSpace /DeviceGray\n"
2033 "/Decode [1 0]");
2034 else if (cid->spp == 1) /* 8 bpp */
2035 cstr = stringNew("/ColorSpace /DeviceGray");
2036 else if (cid->spp == 3)
2037 cstr = stringNew("/ColorSpace /DeviceRGB");
2038 else
2039 L_ERROR("unknown colorspace: spp = %d\n",
2040 __func__, cid->spp);
2041 }
2042 snprintf(buff, sizeof(buff), "/BitsPerComponent %d", cid->bps);
2043 bstr = stringNew(buff);
2044 fstr = stringNew("/Filter /FlateDecode");
2045 if (cid->predictor == TRUE) {
2046 snprintf(buff, sizeof(buff),
2047 "/DecodeParms\n"
2048 "<<\n"
2049 " /Columns %d\n"
2050 " /Predictor 14\n"
2051 " /Colors %d\n"
2052 " /BitsPerComponent %d\n"
2053 ">>\n", cid->w, cid->spp, cid->bps);
2054 pstr = stringNew(buff);
2055 }
2056 }
2057 if (!pstr) /* no decode parameters */
2058 pstr = stringNew("");
2059
2060 snprintf(buf, sizeof(buf),
2061 "%d 0 obj\n"
2062 "<<\n"
2063 "/Length %zu\n"
2064 "/Subtype /Image\n"
2065 "%s\n" /* colorspace */
2066 "/Width %d\n"
2067 "/Height %d\n"
2068 "%s\n" /* bits/component */
2069 "%s\n" /* filter */
2070 "%s" /* decode parms; can be empty */
2071 ">>\n"
2072 "stream\n",
2073 6 + i, cid->nbytescomp, cstr,
2074 cid->w, cid->h, bstr, fstr, pstr);
2075 xstr = stringNew(buf);
2076 sarrayAddString(sa, xstr, L_INSERT);
2077 l_dnaAddNumber(lpd->objsize,
2078 strlen(xstr) + cid->nbytescomp + strlen(lpd->poststream));
2079 LEPT_FREE(cstr);
2080 LEPT_FREE(bstr);
2081 LEPT_FREE(fstr);
2082 LEPT_FREE(pstr);
2083 }
2084
2085 return 0;
2086}
2087
2088
2089static l_int32
2090generateColormapStringsPdf(L_PDF_DATA *lpd)
2091{
2092char buf[L_BIGBUF];
2093char *cmstr;
2094l_int32 i, cmindex, ncmap;
2095L_COMP_DATA *cid;
2096SARRAY *sa;
2097
2098 /* In our canonical format, we have 5 objects, followed
2099 * by n XObjects, followed by m colormaps, so the index of
2100 * the first colormap object is 6 + n. */
2101 sa = lpd->sacmap;
2102 cmindex = 6 + lpd->n; /* starting value */
2103 ncmap = 0;
2104 for (i = 0; i < lpd->n; i++) {
2105 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
2106 return ERROR_INT("cid not found", __func__, 1);
2107 if (cid->ncolors == 0) continue;
2108
2109 ncmap++;
2110 snprintf(buf, sizeof(buf), "%d 0 obj\n"
2111 "[ /Indexed /DeviceRGB\n"
2112 "%d\n"
2113 "%s\n"
2114 "]\n"
2115 "endobj\n",
2116 cmindex, cid->ncolors - 1, cid->cmapdatahex);
2117 cmindex++;
2118 cmstr = stringNew(buf);
2119 l_dnaAddNumber(lpd->objsize, strlen(cmstr));
2120 sarrayAddString(sa, cmstr, L_INSERT);
2121 }
2122
2123 lpd->ncmap = ncmap;
2124 return 0;
2125}
2126
2127
2128static void
2129generateTrailerPdf(L_PDF_DATA *lpd)
2130{
2131l_int32 i, n, size, linestart;
2132L_DNA *daloc, *dasize;
2133
2134 /* Let nobj be the number of numbered objects. These numbered
2135 * objects are indexed by their pdf number in arrays naloc[]
2136 * and nasize[]. The 0th object is the 9 byte header. Then
2137 * the number of objects in nasize, which includes the header,
2138 * is n = nobj + 1. The array naloc[] has n + 1 elements,
2139 * because it includes as the last element the starting
2140 * location of xref. The indexing of these objects, their
2141 * starting locations and sizes are:
2142 *
2143 * Object number Starting location Size
2144 * ------------- ----------------- --------------
2145 * 0 daloc[0] = 0 dasize[0] = 9
2146 * 1 daloc[1] = 9 dasize[1] = 49
2147 * n daloc[n] dasize[n]
2148 * xref daloc[n+1]
2149 *
2150 * We first generate daloc.
2151 */
2152 dasize = lpd->objsize;
2153 daloc = lpd->objloc;
2154 linestart = 0;
2155 l_dnaAddNumber(daloc, linestart); /* header */
2156 n = l_dnaGetCount(dasize);
2157 for (i = 0; i < n; i++) {
2158 l_dnaGetIValue(dasize, i, &size);
2159 linestart += size;
2160 l_dnaAddNumber(daloc, linestart);
2161 }
2162 l_dnaGetIValue(daloc, n, &lpd->xrefloc); /* save it */
2163
2164 /* Now make the actual trailer string */
2165 lpd->trailer = makeTrailerStringPdf(daloc);
2166}
2167
2168
2169static char *
2170makeTrailerStringPdf(L_DNA *daloc)
2171{
2172char *outstr;
2173char buf[L_BIGBUF];
2174l_int32 i, n, linestart, xrefloc;
2175SARRAY *sa;
2176
2177 if (!daloc)
2178 return (char *)ERROR_PTR("daloc not defined", __func__, NULL);
2179 n = l_dnaGetCount(daloc) - 1; /* numbered objects + 1 (yes, +1) */
2180
2181 sa = sarrayCreate(0);
2182 snprintf(buf, sizeof(buf), "xref\n"
2183 "0 %d\n"
2184 "0000000000 65535 f \n", n);
2185 sarrayAddString(sa, buf, L_COPY);
2186 for (i = 1; i < n; i++) {
2187 l_dnaGetIValue(daloc, i, &linestart);
2188 snprintf(buf, sizeof(buf), "%010d 00000 n \n", linestart);
2189 sarrayAddString(sa, buf, L_COPY);
2190 }
2191
2192 l_dnaGetIValue(daloc, n, &xrefloc);
2193 snprintf(buf, sizeof(buf), "trailer\n"
2194 "<<\n"
2195 "/Size %d\n"
2196 "/Root 1 0 R\n"
2197 "/Info 2 0 R\n"
2198 ">>\n"
2199 "startxref\n"
2200 "%d\n"
2201 "%%%%EOF\n", n, xrefloc);
2202 sarrayAddString(sa, buf, L_COPY);
2203 outstr = sarrayToString(sa, 0);
2204 sarrayDestroy(&sa);
2205 return outstr;
2206}
2207
2208
2222static l_int32
2224 size_t *pnbytes,
2225 L_PDF_DATA *lpd)
2226{
2227char *str;
2228l_uint8 *data;
2229l_int32 nimages, i, len;
2230l_int32 *sizes, *locs;
2231size_t nbytes;
2232L_COMP_DATA *cid;
2233
2234 if (!pdata)
2235 return ERROR_INT("&data not defined", __func__, 1);
2236 *pdata = NULL;
2237 if (!pnbytes)
2238 return ERROR_INT("&nbytes not defined", __func__, 1);
2239 nbytes = lpd->xrefloc + strlen(lpd->trailer);
2240 *pnbytes = nbytes;
2241 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes, sizeof(l_uint8))) == NULL)
2242 return ERROR_INT("calloc fail for data", __func__, 1);
2243 *pdata = data;
2244
2245 sizes = l_dnaGetIArray(lpd->objsize);
2246 locs = l_dnaGetIArray(lpd->objloc);
2247 memcpy(data, lpd->id, sizes[0]);
2248 memcpy(data + locs[1], lpd->obj1, sizes[1]);
2249 memcpy(data + locs[2], lpd->obj2, sizes[2]);
2250 memcpy(data + locs[3], lpd->obj3, sizes[3]);
2251 memcpy(data + locs[4], lpd->obj4, sizes[4]);
2252 memcpy(data + locs[5], lpd->obj5, sizes[5]);
2253
2254 /* Each image has 3 parts: variable preamble, the compressed
2255 * data stream, and the fixed poststream. */
2256 nimages = lpd->n;
2257 for (i = 0; i < nimages; i++) {
2258 if ((cid = pdfdataGetCid(lpd, i)) == NULL) { /* should not happen */
2259 LEPT_FREE(sizes);
2260 LEPT_FREE(locs);
2261 return ERROR_INT("cid not found", __func__, 1);
2262 }
2263 str = sarrayGetString(lpd->saprex, i, L_NOCOPY);
2264 len = strlen(str);
2265 memcpy(data + locs[6 + i], str, len);
2266 memcpy(data + locs[6 + i] + len,
2267 cid->datacomp, cid->nbytescomp);
2268 memcpy(data + locs[6 + i] + len + cid->nbytescomp,
2269 lpd->poststream, strlen(lpd->poststream));
2270 }
2271
2272 /* Each colormap is simply a stored string */
2273 for (i = 0; i < lpd->ncmap; i++) {
2274 str = sarrayGetString(lpd->sacmap, i, L_NOCOPY);
2275 memcpy(data + locs[6 + nimages + i], str, strlen(str));
2276 }
2277
2278 /* And finally the trailer */
2279 memcpy(data + lpd->xrefloc, lpd->trailer, strlen(lpd->trailer));
2280 LEPT_FREE(sizes);
2281 LEPT_FREE(locs);
2282 return 0;
2283}
2284
2285
2286/*---------------------------------------------------------------------*
2287 * Helper functions for generating multipage pdf output *
2288 *---------------------------------------------------------------------*/
2296static l_int32
2298 L_DNA **pda)
2299{
2300char *str;
2301l_uint8 nl = '\n';
2302l_uint8 *data;
2303l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
2304size_t size;
2305L_DNA *da, *daobj, *daxref;
2306SARRAY *sa;
2307
2308 if (!pda)
2309 return ERROR_INT("&da not defined", __func__, 1);
2310 *pda = NULL;
2311 if (!bas)
2312 return ERROR_INT("bas not defined", __func__, 1);
2313 data = l_byteaGetData(bas, &size);
2314 if (memcmp(data, "%PDF-1.", 7) != 0)
2315 return ERROR_INT("PDF header signature not found", __func__, 1);
2316
2317 /* Search for "startxref" starting 50 bytes from the EOF */
2318 start = 0;
2319 if (size > 50)
2320 start = size - 50;
2321 arrayFindSequence(data + start, size - start,
2322 (l_uint8 *)"startxref\n", 10, &loc, &found);
2323 if (!found)
2324 return ERROR_INT("startxref not found!", __func__, 1);
2325 if (sscanf((char *)(data + start + loc + 10), "%d\n", &xrefloc) != 1)
2326 return ERROR_INT("xrefloc not found!", __func__, 1);
2327 if (xrefloc < 0 || xrefloc >= size)
2328 return ERROR_INT("invalid xrefloc!", __func__, 1);
2329 sa = sarrayCreateLinesFromString((char *)(data + xrefloc), 0);
2330 str = sarrayGetString(sa, 1, L_NOCOPY);
2331 if ((sscanf(str, "0 %d", &nobj)) != 1) {
2332 sarrayDestroy(&sa);
2333 return ERROR_INT("nobj not found", __func__, 1);
2334 }
2335
2336 /* Get starting locations. The numa index is the
2337 * object number. loc[0] is the ID; loc[nobj + 1] is xrefloc. */
2338 da = l_dnaCreate(nobj + 1);
2339 *pda = da;
2340 for (i = 0; i < nobj; i++) {
2341 str = sarrayGetString(sa, i + 2, L_NOCOPY);
2342 sscanf(str, "%d", &startloc);
2343 l_dnaAddNumber(da, startloc);
2344 }
2345 l_dnaAddNumber(da, xrefloc);
2346
2347#if DEBUG_MULTIPAGE
2348 lept_stderr("************** Trailer string ************\n");
2349 lept_stderr("xrefloc = %d", xrefloc);
2350 sarrayWriteStderr(sa);
2351
2352 lept_stderr("************** Object locations ************");
2353 l_dnaWriteStderr(da);
2354#endif /* DEBUG_MULTIPAGE */
2355 sarrayDestroy(&sa);
2356
2357 /* Verify correct parsing */
2358 trailer_ok = TRUE;
2359 for (i = 1; i < nobj; i++) {
2360 l_dnaGetIValue(da, i, &startloc);
2361 if ((sscanf((char *)(data + startloc), "%d 0 obj", &objno)) != 1) {
2362 L_ERROR("bad trailer for object %d\n", __func__, i);
2363 trailer_ok = FALSE;
2364 break;
2365 }
2366 }
2367
2368 /* If the trailer is broken, reconstruct the correct obj locations */
2369 if (!trailer_ok) {
2370 L_INFO("rebuilding pdf trailer\n", __func__);
2371 l_dnaEmpty(da);
2372 l_dnaAddNumber(da, 0);
2373 l_byteaFindEachSequence(bas, (l_uint8 *)" 0 obj\n", 7, &daobj);
2374 nobj = l_dnaGetCount(daobj);
2375 for (i = 0; i < nobj; i++) {
2376 l_dnaGetIValue(daobj, i, &loc);
2377 for (j = loc - 1; j > 0; j--) {
2378 if (data[j] == nl)
2379 break;
2380 }
2381 l_dnaAddNumber(da, j + 1);
2382 }
2383 l_byteaFindEachSequence(bas, (l_uint8 *)"xref", 4, &daxref);
2384 l_dnaGetIValue(daxref, 0, &loc);
2385 l_dnaAddNumber(da, loc);
2386 l_dnaDestroy(&daobj);
2387 l_dnaDestroy(&daxref);
2388 }
2389
2390 return 0;
2391}
2392
2393
2394static char *
2395generatePagesObjStringPdf(NUMA *napage)
2396{
2397char *str;
2398char *buf;
2399l_int32 i, n, index, bufsize;
2400SARRAY *sa;
2401
2402 if (!napage)
2403 return (char *)ERROR_PTR("napage not defined", __func__, NULL);
2404
2405 n = numaGetCount(napage);
2406 bufsize = 100 + 16 * n; /* large enough to hold the output string */
2407 buf = (char *)LEPT_CALLOC(bufsize, sizeof(char));
2408 sa = sarrayCreate(n);
2409 for (i = 0; i < n; i++) {
2410 numaGetIValue(napage, i, &index);
2411 snprintf(buf, bufsize, " %d 0 R ", index);
2412 sarrayAddString(sa, buf, L_COPY);
2413 }
2414
2415 str = sarrayToString(sa, 0);
2416 snprintf(buf, bufsize - 1, "3 0 obj\n"
2417 "<<\n"
2418 "/Type /Pages\n"
2419 "/Kids [%s]\n"
2420 "/Count %d\n"
2421 ">>\n"
2422 "endobj\n",
2423 str, n);
2424 sarrayDestroy(&sa);
2425 LEPT_FREE(str);
2426 return buf;
2427}
2428
2429
2447static L_BYTEA *
2449 NUMA *na_objs)
2450{
2451l_uint8 space = ' ';
2452l_uint8 *datas;
2453l_uint8 buf[32]; /* only needs to hold one integer in ascii format */
2454l_int32 start, nrepl, i, j, nobjs, objin, objout, found;
2455l_int32 *objs, *matches;
2456size_t size;
2457L_BYTEA *bad;
2458L_DNA *da_match;
2459
2460 if (!bas)
2461 return (L_BYTEA *)ERROR_PTR("bas not defined", __func__, NULL);
2462 if (!na_objs)
2463 return (L_BYTEA *)ERROR_PTR("na_objs not defined", __func__, NULL);
2464
2465 datas = l_byteaGetData(bas, &size);
2466 bad = l_byteaCreate(100);
2467 objs = numaGetIArray(na_objs); /* object number mapper */
2468 nobjs = numaGetCount(na_objs); /* use for sanity checking */
2469
2470 /* Substitute the object number on the first line */
2471 sscanf((char *)datas, "%d", &objin);
2472 if (objin < 0 || objin >= nobjs) {
2473 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2474 LEPT_FREE(objs);
2475 return bad;
2476 }
2477 objout = objs[objin];
2478 snprintf((char *)buf, 32, "%d", objout);
2479 l_byteaAppendString(bad, (char *)buf);
2480
2481 /* Find the set of matching locations for object references */
2482 arrayFindSequence(datas, size, &space, 1, &start, &found);
2483 da_match = arrayFindEachSequence(datas, size, (l_uint8 *)" 0 R", 4);
2484 if (!da_match) {
2485 l_byteaAppendData(bad, datas + start, size - start);
2486 LEPT_FREE(objs);
2487 return bad;
2488 }
2489
2490 /* Substitute all the object reference numbers */
2491 nrepl = l_dnaGetCount(da_match);
2492 matches = l_dnaGetIArray(da_match);
2493 for (i = 0; i < nrepl; i++) {
2494 /* Find the first space before the object number */
2495 for (j = matches[i] - 1; j > 0; j--) {
2496 if (datas[j] == space)
2497 break;
2498 }
2499 /* Copy bytes from 'start' up to the object number */
2500 l_byteaAppendData(bad, datas + start, j - start + 1);
2501 sscanf((char *)(datas + j + 1), "%d", &objin);
2502 if (objin < 0 || objin >= nobjs) {
2503 L_ERROR("index %d into array of size %d\n", __func__, objin, nobjs);
2504 LEPT_FREE(objs);
2505 LEPT_FREE(matches);
2506 l_dnaDestroy(&da_match);
2507 return bad;
2508 }
2509 objout = objs[objin];
2510 snprintf((char *)buf, 32, "%d", objout);
2511 l_byteaAppendString(bad, (char *)buf);
2512 start = matches[i];
2513 }
2514 l_byteaAppendData(bad, datas + start, size - start);
2515
2516 LEPT_FREE(objs);
2517 LEPT_FREE(matches);
2518 l_dnaDestroy(&da_match);
2519 return bad;
2520}
2521
2522
2523/*---------------------------------------------------------------------*
2524 * Create/destroy/access pdf data *
2525 *---------------------------------------------------------------------*/
2526static L_PDF_DATA *
2527pdfdataCreate(const char *title)
2528{
2529L_PDF_DATA *lpd;
2530
2531 lpd = (L_PDF_DATA *)LEPT_CALLOC(1, sizeof(L_PDF_DATA));
2532 if (title) lpd->title = stringNew(title);
2533 lpd->cida = ptraCreate(10);
2534 lpd->xy = ptaCreate(10);
2535 lpd->wh = ptaCreate(10);
2536 lpd->saprex = sarrayCreate(10);
2537 lpd->sacmap = sarrayCreate(10);
2538 lpd->objsize = l_dnaCreate(20);
2539 lpd->objloc = l_dnaCreate(20);
2540 return lpd;
2541}
2542
2543static void
2544pdfdataDestroy(L_PDF_DATA **plpd)
2545{
2546l_int32 i;
2547L_COMP_DATA *cid;
2548L_PDF_DATA *lpd;
2549
2550 if (plpd== NULL) {
2551 L_WARNING("ptr address is null!\n", __func__);
2552 return;
2553 }
2554 if ((lpd = *plpd) == NULL)
2555 return;
2556
2557 if (lpd->title) LEPT_FREE(lpd->title);
2558 for (i = 0; i < lpd->n; i++) {
2559 cid = (L_COMP_DATA *)ptraRemove(lpd->cida, i, L_NO_COMPACTION);
2560 l_CIDataDestroy(&cid);
2561 }
2562
2563 ptraDestroy(&lpd->cida, 0, 0);
2564 if (lpd->id) LEPT_FREE(lpd->id);
2565 if (lpd->obj1) LEPT_FREE(lpd->obj1);
2566 if (lpd->obj2) LEPT_FREE(lpd->obj2);
2567 if (lpd->obj3) LEPT_FREE(lpd->obj3);
2568 if (lpd->obj4) LEPT_FREE(lpd->obj4);
2569 if (lpd->obj5) LEPT_FREE(lpd->obj5);
2570 if (lpd->poststream) LEPT_FREE(lpd->poststream);
2571 if (lpd->trailer) LEPT_FREE(lpd->trailer);
2572 if (lpd->xy) ptaDestroy(&lpd->xy);
2573 if (lpd->wh) ptaDestroy(&lpd->wh);
2574 if (lpd->mediabox) boxDestroy(&lpd->mediabox);
2575 if (lpd->saprex) sarrayDestroy(&lpd->saprex);
2576 if (lpd->sacmap) sarrayDestroy(&lpd->sacmap);
2577 if (lpd->objsize) l_dnaDestroy(&lpd->objsize);
2578 if (lpd->objloc) l_dnaDestroy(&lpd->objloc);
2579 LEPT_FREE(lpd);
2580 *plpd = NULL;
2581}
2582
2583
2584static L_COMP_DATA *
2585pdfdataGetCid(L_PDF_DATA *lpd,
2586 l_int32 index)
2587{
2588 if (!lpd)
2589 return (L_COMP_DATA *)ERROR_PTR("lpd not defined", __func__, NULL);
2590 if (index < 0 || index >= lpd->n)
2591 return (L_COMP_DATA *)ERROR_PTR("invalid image index", __func__, NULL);
2592
2593 return (L_COMP_DATA *)ptraGetPtrToItem(lpd->cida, index);
2594}
2595
2596
2597/*---------------------------------------------------------------------*
2598 * Find number of pages in a pdf *
2599 *---------------------------------------------------------------------*/
2618l_ok
2619getPdfPageCount(const char *fname,
2620 l_int32 *pnpages)
2621{
2622l_uint8 *data;
2623l_int32 format, loc, ret, npages, found;
2624size_t nread;
2625
2626 if (!pnpages)
2627 return ERROR_INT("&npages not defined", __func__, 1);
2628 *pnpages = 0;
2629 if (!fname)
2630 return ERROR_INT("fname not defined", __func__, 1);
2631
2632 /* Make sure this a pdf file */
2633 findFileFormat(fname, &format);
2634 if (format != IFF_LPDF)
2635 return ERROR_INT("file is not pdf", __func__, 1);
2636
2637 /* Read 10000 bytes from the beginning of the file */
2638 if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))
2639 == NULL)
2640 return ERROR_INT("partial data not read", __func__, 1);
2641
2642 /* Find the location of the first instance of "/Count".
2643 * If it is not found, try reading the entire file and
2644 * looking again. */
2645 arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
2646 strlen("/Count"), &loc, &found);
2647 if (!found) {
2648 lept_stderr("Reading entire file looking for '/Count'\n");
2649 LEPT_FREE(data);
2650 if ((data = l_binaryRead(fname, &nread)) == NULL)
2651 return ERROR_INT("full data not read", __func__, 1);
2652 arrayFindSequence(data, nread, (const l_uint8 *)"/Count",
2653 strlen("/Count"), &loc, &found);
2654 if (!found) {
2655 LEPT_FREE(data);
2656 L_WARNING("/Count not found\n", __func__);
2657 return 0;
2658 }
2659 }
2660
2661 /* Unlikely: make sure we can read the count field */
2662 if (nread - loc < 12) { /* haven't read enough to capture page count */
2663 LEPT_FREE(data);
2664 return ERROR_INT("data may not include page count field", __func__, 1);
2665 }
2666
2667 /* Read the page count; if not found, puts garbage in npages */
2668 ret = sscanf((char *)&data[loc], "/Count %d", &npages);
2669 LEPT_FREE(data);
2670 if (ret != 1)
2671 return ERROR_INT("npages not found", __func__, 1);
2672 *pnpages = npages;
2673/* lept_stderr("bytes read = %d, loc = %d, npages = %d\n",
2674 nread, loc, *pnpages); */
2675 return 0;
2676}
2677
2678
2679/*---------------------------------------------------------------------*
2680 * Find widths and heights of pages and media boxes in a pdf *
2681 *---------------------------------------------------------------------*/
2702l_ok
2703getPdfPageSizes(const char *fname,
2704 NUMA **pnaw,
2705 NUMA **pnah,
2706 l_int32 *pmedw,
2707 l_int32 *pmedh)
2708{
2709l_uint8 *data;
2710l_int32 i, nw, nh, format, ret, loc, width, height;
2711l_float32 fval;
2712size_t nread;
2713L_DNA *dnaw; /* width locations */
2714L_DNA *dnah; /* height locations */
2715NUMA *naw; /* widths */
2716NUMA *nah; /* heights */
2717
2718 if (pnaw) *pnaw = NULL;
2719 if (pnah) *pnah = NULL;
2720 if (pmedw) *pmedw = 0;
2721 if (pmedh) *pmedh = 0;
2722 if (!pnaw && !pnah && !pmedw && !pmedh)
2723 return ERROR_INT("no output requested", __func__, 1);
2724 if (!fname)
2725 return ERROR_INT("fname not defined", __func__, 1);
2726
2727 /* Make sure this a pdf file */
2728 findFileFormat(fname, &format);
2729 if (format != IFF_LPDF)
2730 return ERROR_INT("file is not pdf", __func__, 1);
2731
2732 /* Read the file into memory and find all locations of
2733 * '/Width' and '/Height' */
2734 if ((data = l_binaryRead(fname, &nread)) == NULL)
2735 return ERROR_INT("full data not read", __func__, 1);
2736 dnaw = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Width",
2737 strlen("/Width"));
2738 dnah = arrayFindEachSequence(data, nread, (const l_uint8 *)"/Height",
2739 strlen("/Height"));
2740 if (!dnaw)
2741 L_WARNING("unable to find widths\n", __func__);
2742 if (!dnah)
2743 L_WARNING("unable to find heights\n", __func__);
2744 if (!dnaw && !dnah) {
2745 LEPT_FREE(data);
2746 L_WARNING("no fields found\n", __func__);
2747 return 0;
2748 }
2749
2750 /* Find the page widths and heights */
2751 nw = l_dnaGetCount(dnaw);
2752 naw = numaCreate(nw);
2753 for (i = 0; i < nw; i++) {
2754 l_dnaGetIValue(dnaw, i, &loc);
2755 ret = sscanf((char *)&data[loc], "/Width %d", &width);
2756 if (ret != 1) {
2757 L_ERROR("width not found for item %d at loc %d\n",
2758 __func__, i, loc);
2759 continue;
2760 }
2761 numaAddNumber(naw, width);
2762 }
2763 nh = l_dnaGetCount(dnah);
2764 nah = numaCreate(nh);
2765 for (i = 0; i < nh; i++) {
2766 l_dnaGetIValue(dnah, i, &loc);
2767 ret = sscanf((char *)&data[loc], "/Height %d", &height);
2768 if (ret != 1) {
2769 L_ERROR("height not found for item %d at loc %d\n",
2770 __func__, i, loc);
2771 continue;
2772 }
2773 numaAddNumber(nah, height);
2774 }
2775
2776 LEPT_FREE(data);
2777 l_dnaDestroy(&dnaw);
2778 l_dnaDestroy(&dnah);
2779 if (pmedw) {
2780 numaGetMedian(naw, &fval);
2781 *pmedw = lept_roundftoi(fval);
2782 }
2783 if (pnaw)
2784 *pnaw = naw;
2785 else
2786 numaDestroy(&naw);
2787 if (pmedh) {
2788 numaGetMedian(nah, &fval);
2789 *pmedh = lept_roundftoi(fval);
2790 }
2791 if (pnah)
2792 *pnah = nah;
2793 else
2794 numaDestroy(&nah);
2795 return 0;
2796}
2797
2798
2825l_ok
2826getPdfMediaBoxSizes(const char *fname,
2827 NUMA **pnaw,
2828 NUMA **pnah,
2829 l_int32 *pmedw,
2830 l_int32 *pmedh)
2831{
2832l_uint8 *data;
2833l_int32 i, n, format, ret, loc;
2834l_float32 fval, ignore1, ignore2, w, h;
2835size_t nread;
2836L_DNA *dna; /* mediabox locations */
2837NUMA *naw; /* mediabox widths */
2838NUMA *nah; /* mediabox heights */
2839
2840 if (pnaw) *pnaw = NULL;
2841 if (pnah) *pnah = NULL;
2842 if (pmedw) *pmedw = 0;
2843 if (pmedh) *pmedh = 0;
2844 if (!pnaw && !pnah && !pmedw && !pmedh)
2845 return ERROR_INT("no output requested", __func__, 1);
2846 if (!fname)
2847 return ERROR_INT("fname not defined", __func__, 1);
2848
2849 /* Make sure this a pdf file */
2850 findFileFormat(fname, &format);
2851 if (format != IFF_LPDF)
2852 return ERROR_INT("file is not pdf", __func__, 1);
2853
2854 /* Read the file into memory and find all locations of '/MediaBox' */
2855 if ((data = l_binaryRead(fname, &nread)) == NULL)
2856 return ERROR_INT("full data not read", __func__, 1);
2857 dna = arrayFindEachSequence(data, nread, (const l_uint8 *)"/MediaBox",
2858 strlen("/MediaBox"));
2859 if (!dna) {
2860 LEPT_FREE(data);
2861 L_WARNING("no mediaboxes found\n", __func__);
2862 return 1;
2863 }
2864
2865 /* Find the mediabox widths and heights */
2866 n = l_dnaGetCount(dna);
2867 naw = numaCreate(n);
2868 nah = numaCreate(n);
2869 for (i = 0; i < n; i++) {
2870 l_dnaGetIValue(dna, i, &loc);
2871 ret = sscanf((char *)&data[loc], "/MediaBox [ %f %f %f %f",
2872 &ignore1, &ignore2, &w, &h);
2873 if (ret != 4) {
2874 L_ERROR("mediabox sizes not found for item %d at loc %d\n",
2875 __func__, i, loc);
2876 continue;
2877 }
2878 numaAddNumber(naw, w);
2879 numaAddNumber(nah, h);
2880 }
2881 LEPT_FREE(data);
2882 l_dnaDestroy(&dna);
2883
2884 if (pmedw) {
2885 numaGetMedian(naw, &fval);
2886 *pmedw = lept_roundftoi(fval);
2887 if (*pmedw > 850) lept_stderr("oversize width: %d\n", *pmedw);
2888 }
2889 if (pnaw)
2890 *pnaw = naw;
2891 else
2892 numaDestroy(&naw);
2893 if (pmedh) {
2894 numaGetMedian(nah, &fval);
2895 *pmedh = lept_roundftoi(fval);
2896 if (*pmedh > 850) lept_stderr("oversize height: %d\n", *pmedh);
2897 }
2898 if (pnah)
2899 *pnah = nah;
2900 else
2901 numaDestroy(&nah);
2902 return 0;
2903}
2904
2905
2906/*---------------------------------------------------------------------*
2907 * Find effective resolution of images rendered from a pdf *
2908 *---------------------------------------------------------------------*/
2937l_ok
2938getPdfRendererResolution(const char *infile,
2939 const char *outdir,
2940 l_int32 *pres)
2941{
2942char buf[256];
2943char *tail, *basename, *fname;
2944l_int32 ret, res, medw, medh, medmax, npages, pageno, w, h;
2945SARRAY *sa;
2946
2947 if (!pres)
2948 return ERROR_INT("&res not defined", __func__, 1);
2949 *pres = 300; /* default */
2950
2951#ifdef _WIN32
2952 L_INFO("Requires pdftoppm, so this is disabled on windows.\n"
2953 "Returns default resolution 300 ppi", __func__);
2954 return 0;
2955#endif /* _WIN32 */
2956
2957 if (!LeptDebugOK) {
2958 L_INFO("Running pdftoppm is disabled; "
2959 "use setLeptDebugOK(1) to enable\n",
2960 "returns default resolution 300 ppi\n", __func__);
2961 return 1;
2962 }
2963
2964 if (!infile)
2965 return ERROR_INT("infile not defined", __func__, 1);
2966 if (!outdir)
2967 return ERROR_INT("outdir not defined", __func__, 1);
2968
2969 res = 300; /* default value */
2970 ret = getPdfMediaBoxSizes(infile, NULL, NULL, &medw, &medh);
2971 if (ret == 0) { /* Check for oversize mediaboxes */
2972 lept_stderr("Media Box medians: medw = %d, medh = %d\n", medw, medh);
2973 medmax = L_MAX(medw, medh);
2974 if (medmax > 850) {
2975 res = 300 * ((l_float32)792 / (l_float32)medmax);
2976 lept_stderr(" Oversize media box; use resolution = %d\n", res);
2977 *pres = res;
2978 }
2979 return 0;
2980 }
2981
2982 /* No mediaboxes; render one page and measure the max dimension */
2983 lept_stderr("Media Box dimensions not found\n");
2984 getPdfPageCount(infile, &npages);
2985 pageno = (npages > 0) ? (npages + 1) / 2 : 1;
2986 splitPathAtDirectory(infile, NULL, &tail);
2987 splitPathAtExtension(tail, &basename, NULL);
2988 snprintf(buf, sizeof(buf), "pdftoppm -f %d -l %d -r 72 %s %s/%s",
2989 pageno, pageno, infile, outdir, basename);
2990 LEPT_FREE(tail);
2991 LEPT_FREE(basename);
2992 callSystemDebug(buf); /* pdftoppm */
2993
2994 /* Get the page size */
2995 sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);
2996 fname = sarrayGetString(sa, 0, L_NOCOPY);
2997 pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);
2998 sarrayDestroy(&sa);
2999 if (w > 0 && h > 0) {
3000 res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);
3001 *pres = res;
3002 lept_stderr("Use resolution = %d\n", res);
3003 } else {
3004 L_ERROR("page size not found; assuming res = 300\n", __func__);
3005 }
3006
3007 return 0;
3008}
3009
3010
3011/*---------------------------------------------------------------------*
3012 * Set flags for special modes *
3013 *---------------------------------------------------------------------*/
3028void
3030{
3031 var_WRITE_G4_IMAGE_MASK = flag;
3032}
3033
3034
3048void
3050{
3051 var_WRITE_DATE_AND_VERSION = flag;
3052}
3053
3054/* --------------------------------------------*/
3055#endif /* USE_PDFIO */
3056/* --------------------------------------------*/
@ L_FLATE_ENCODE
Definition imageio.h:161
@ L_G4_ENCODE
Definition imageio.h:160
@ L_JP2K_ENCODE
Definition imageio.h:162
@ L_JPEG_ENCODE
Definition imageio.h:159
@ L_FIRST_IMAGE
Definition imageio.h:208
@ L_LAST_IMAGE
Definition imageio.h:210
void l_CIDataDestroy(L_COMP_DATA **pcid)
l_CIDataDestroy()
Definition pdfio2.c:1654
L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
l_generateJpegDataMem()
Definition pdfio2.c:1001
l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
pixGenerateCIData()
Definition pdfio2.c:1204
l_ok getPdfRendererResolution(const char *infile, const char *outdir, l_int32 *pres)
getPdfRendererResolution()
Definition pdfio2.c:2938
static L_COMP_DATA * pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag)
pixGenerateFlateData()
Definition pdfio2.c:1341
L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag)
l_generateFlateData()
Definition pdfio2.c:1306
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
Definition pdfio2.c:201
L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag)
l_generateJpegData()
Definition pdfio2.c:925
static char * generateEscapeString(const char *str)
generateEscapeString()
Definition pdfio2.c:1807
L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag)
l_generateG4Data()
Definition pdfio2.c:1113
static L_COMP_DATA * pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, l_int32 quality)
pixGenerateJpegData()
Definition pdfio2.c:1469
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
Definition pdfio2.c:3049
static L_BYTEA * substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs)
substituteObjectNumbers()
Definition pdfio2.c:2448
l_ok getPdfMediaBoxSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfMediaBoxSizes()
Definition pdfio2.c:2826
l_ok getPdfPageCount(const char *fname, l_int32 *pnpages)
getPdfPageCount()
Definition pdfio2.c:2619
l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout)
convertTiffMultipageToPdf()
Definition pdfio2.c:491
l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
l_generateCIData()
Definition pdfio2.c:625
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
Definition pdfio2.c:329
static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda)
parseTrailerPdf()
Definition pdfio2.c:2297
static L_COMP_DATA * pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag)
pixGenerateG4Data()
Definition pdfio2.c:1563
l_ok getPdfPageSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfPageSizes()
Definition pdfio2.c:2703
void l_pdfSetG4ImageMask(l_int32 flag)
l_pdfSetG4ImageMask()
Definition pdfio2.c:3029
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
Definition pdfio2.c:1607
static L_COMP_DATA * l_generateJp2kData(const char *fname)
l_generateJp2kData()
Definition pdfio2.c:1061
L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pixs)
l_generateFlateDataPdf()
Definition pdfio2.c:727
static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
l_generatePdf()
Definition pdfio2.c:1699
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
Definition pdfio2.c:543
static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
generateOutputDataPdf()
Definition pdfio2.c:2223
static L_COMP_DATA * pixGenerateJp2kData(PIX *pixs, l_int32 quality)
pixGenerateJp2kData()
Definition pdfio2.c:1517
@ L_COPY
Definition pix.h:505
@ L_CLONE
Definition pix.h:506
@ L_NOCOPY
Definition pix.h:503
@ L_INSERT
Definition pix.h:504
@ L_NO_COMPACTION
Definition ptra.h:79
l_int32 ncolors
Definition imageio.h:190
l_int32 predictor
Definition imageio.h:196
char * cmapdatahex
Definition imageio.h:189
l_uint8 * datacomp
Definition imageio.h:184
size_t nbytescomp
Definition imageio.h:185
l_int32 minisblack
Definition imageio.h:195
char * cmapdata85
Definition imageio.h:188
l_int32 xrefloc
Definition imageio.h:246
char * poststream
Definition imageio.h:237
struct Sarray * saprex
Definition imageio.h:242
struct L_Ptra * cida
Definition imageio.h:230
struct Pta * xy
Definition imageio.h:239
l_int32 ncmap
Definition imageio.h:229
char * obj2
Definition imageio.h:233
char * trailer
Definition imageio.h:238
char * obj1
Definition imageio.h:232
struct Sarray * sacmap
Definition imageio.h:243
l_int32 n
Definition imageio.h:228
struct L_Dna * objsize
Definition imageio.h:244
struct L_Dna * objloc
Definition imageio.h:245
char * title
Definition imageio.h:227
char * id
Definition imageio.h:231
struct Pta * wh
Definition imageio.h:240
char * obj5
Definition imageio.h:236
char * obj4
Definition imageio.h:235
struct Box * mediabox
Definition imageio.h:241
char * obj3
Definition imageio.h:234
Definition ptra.h:54