106#include <config_auto.h>
111#include "allheaders.h"
118static const l_int32 DefaultInputRes = 300;
128static l_int32
l_generatePdf(l_uint8 **pdata,
size_t *pnbytes,
130static void generateFixedStringsPdf(
L_PDF_DATA *lpd);
132static void generateMediaboxPdf(
L_PDF_DATA *lpd);
133static l_int32 generatePageStringPdf(
L_PDF_DATA *lpd);
134static l_int32 generateContentStringPdf(
L_PDF_DATA *lpd);
135static l_int32 generatePreXStringsPdf(
L_PDF_DATA *lpd);
136static l_int32 generateColormapStringsPdf(
L_PDF_DATA *lpd);
137static void generateTrailerPdf(
L_PDF_DATA *lpd);
138static char *makeTrailerStringPdf(
L_DNA *daloc);
143static char *generatePagesObjStringPdf(
NUMA *napage);
146static L_PDF_DATA *pdfdataCreate(
const char *title);
153static l_int32 var_WRITE_G4_IMAGE_MASK = 1;
155static l_int32 var_WRITE_DATE_AND_VERSION = 1;
157#define L_SMALLBUF 256
162#define DEBUG_MULTIPAGE 0
213l_int32 pixres, w, h, ret;
214l_float32 xpt, ypt, wpt, hpt;
219 return ERROR_INT(
"&data not defined", __func__, 1);
222 return ERROR_INT(
"&nbytes not defined", __func__, 1);
225 return ERROR_INT(
"pix not defined", __func__, 1);
228 selectDefaultPdfEncoding(pix, &type);
230 if (quality < 0 || quality > 100)
231 return ERROR_INT(
"invalid quality", __func__, 1);
242 return ERROR_INT(
"cid not made", __func__, 1);
251 res = (pixres > 0) ? pixres : DefaultInputRes;
252 xpt = x * 72.f / res;
253 ypt = y * 72.f / res;
254 wpt = w * 72.f / res;
255 hpt = h * 72.f / res;
259 if ((lpd = pdfdataCreate(title)) == NULL)
260 return ERROR_INT(
"lpd not made", __func__, 1);
262 if ((lpd = pdfdataCreate(title)) == NULL)
263 return ERROR_INT(
"lpd not made", __func__, 1);
270 ptraAdd(lpd->
cida, cid);
272 ptaAddPt(lpd->
xy, xpt, ypt);
273 ptaAddPt(lpd->
wh, wpt, hpt);
279 pdfdataDestroy(&lpd);
280 if (plpd) *plpd = NULL;
282 return ERROR_INT(
"pdf output not made", __func__, 1);
334char *fname, *str_pages, *str_trailer;
335l_uint8 *pdfdata, *data;
336l_int32 i, j, index, nobj, npages;
337l_int32 *sizes, *locs;
339L_BYTEA *bas, *bad, *bat1, *bat2;
340L_DNA *da_locs, *da_sizes, *da_outlocs, *da;
342NUMA *na_objs, *napage;
346 return ERROR_INT(
"&data not defined", __func__, 1);
349 return ERROR_INT(
"&nbytes not defined", __func__, 1);
352 return ERROR_INT(
"pa_data not defined", __func__, 1);
356 ptraGetActualCount(pa_data, &npages);
357 daa_locs = l_dnaaCreate(npages);
358 for (i = 0; i < npages; i++) {
359 bas = (
L_BYTEA *)ptraGetPtrToItem(pa_data, i);
362 l_byteaDestroy(&bas);
364 fname = sarrayGetString(sa, i,
L_NOCOPY);
365 L_ERROR(
"can't parse file %s; skipping\n", __func__, fname);
367 L_ERROR(
"can't parse file %d; skipping\n", __func__, i);
370 l_dnaaAddDna(daa_locs, da_locs,
L_INSERT);
375 ptraCompactArray(pa_data);
376 ptraGetActualCount(pa_data, &npages);
378 l_dnaaDestroy(&daa_locs);
379 return ERROR_INT(
"no parsable pdf files found", __func__, 1);
383 naa_objs = numaaCreate(npages);
384 napage = numaCreate(npages);
386 for (i = 0; i < npages; i++) {
387 da = l_dnaaGetDna(daa_locs, i,
L_CLONE);
388 nobj = l_dnaGetCount(da);
390 numaAddNumber(napage, 4);
391 na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
394 numaAddNumber(napage, index);
395 na_objs = numaMakeConstant(0.0, nobj - 1);
396 numaReplaceNumber(na_objs, 3, 3);
397 for (j = 4; j < nobj - 1; j++)
398 numaSetValue(na_objs, j, index++);
400 numaaAddNuma(naa_objs, na_objs,
L_INSERT);
405 str_pages = generatePagesObjStringPdf(napage);
408 bad = l_byteaCreate(5000);
409 da_outlocs = l_dnaCreate(0);
410 for (i = 0; i < npages; i++) {
411 bas = (
L_BYTEA *)ptraGetPtrToItem(pa_data, i);
412 pdfdata = l_byteaGetData(bas, &size);
413 da_locs = l_dnaaGetDna(daa_locs, i,
L_CLONE);
414 na_objs = numaaGetNuma(naa_objs, i,
L_CLONE);
415 nobj = l_dnaGetCount(da_locs) - 1;
416 da_sizes = l_dnaDiffAdjValues(da_locs);
417 sizes = l_dnaGetIArray(da_sizes);
418 locs = l_dnaGetIArray(da_locs);
420 l_byteaAppendData(bad, pdfdata, sizes[0]);
421 l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
422 l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
423 l_byteaAppendString(bad, str_pages);
424 for (j = 0; j < 4; j++)
425 l_dnaAddNumber(da_outlocs, locs[j]);
427 for (j = 4; j < nobj; j++) {
428 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
429 bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
431 data = l_byteaGetData(bat2, &size);
432 l_byteaAppendData(bad, data, size);
433 l_byteaDestroy(&bat1);
434 l_byteaDestroy(&bat2);
437 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
440 l_dnaDestroy(&da_locs);
441 numaDestroy(&na_objs);
442 l_dnaDestroy(&da_sizes);
446 str_trailer = makeTrailerStringPdf(da_outlocs);
447 l_byteaAppendString(bad, str_trailer);
450 *pdata = l_byteaCopyData(bad, pnbytes);
451 l_byteaDestroy(&bad);
454 lept_stderr(
"******** object mapper **********");
455 numaaWriteStream(stderr, naa_objs);
457 lept_stderr(
"******** Page object numbers ***********");
458 numaWriteStderr(napage);
460 lept_stderr(
"******** Pages object ***********\n");
461 lept_stderr(
"%s\n", str_pages);
464 numaDestroy(&napage);
465 numaaDestroy(&naa_objs);
466 l_dnaDestroy(&da_outlocs);
467 l_dnaaDestroy(&daa_locs);
468 LEPT_FREE(str_pages);
469 LEPT_FREE(str_trailer);
498 if ((fp = fopenReadStream(filein)) == NULL)
499 return ERROR_INT_1(
"file not found", filein, __func__, 1);
500 istiff = fileFormatIsTiff(fp);
503 return ERROR_INT_1(
"file not tiff format", filein, __func__, 1);
505 pixa = pixaReadMultipageTiff(filein);
506 pixaConvertToPdf(pixa, 0, 1.0, 0, 0,
"weasel2", fileout);
553 return ERROR_INT(
"&cid not defined", __func__, 1);
556 return ERROR_INT(
"neither fname nor pix are defined", __func__, 1);
560 if (fname && strcmp(fname,
"-") != 0 && strcmp(fname,
"stdin") != 0) {
561 findFileFormat(fname, &format);
562 if (format == IFF_UNKNOWN)
563 L_WARNING(
"file %s format is unknown\n", __func__, fname);
564 if (format == IFF_PS || format == IFF_LPDF) {
565 L_ERROR(
"file %s is unsupported format %d\n",
566 __func__, fname, format);
569 if (format == IFF_JFIF_JPEG) {
571 }
else if (format == IFF_JP2) {
573 }
else if (format == IFF_PNG) {
581 pixt = pixRead(fname);
583 pixt = pixClone(pix);
585 return ERROR_INT(
"pixt not made", __func__, 1);
586 if (selectDefaultPdfEncoding(pixt, &type)) {
593 return ERROR_INT(
"cid not made from pix", __func__, 1);
631l_int32 format, d, bps, spp, iscmap;
636 return ERROR_INT(
"&cid not defined", __func__, 1);
639 return ERROR_INT(
"fname not defined", __func__, 1);
642 return ERROR_INT(
"invalid conversion type", __func__, 1);
643 if (ascii85 != 0 && ascii85 != 1)
644 return ERROR_INT(
"invalid ascii85", __func__, 1);
647 pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
651 L_WARNING(
"pixs has cmap; using flate encoding\n", __func__);
654 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
657 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
660 L_WARNING(
"pixs has > 1 bpp; using flate encoding\n", __func__);
665 if (format == IFF_JFIF_JPEG) {
668 if ((pix = pixRead(fname)) == NULL)
669 return ERROR_INT(
"pix not returned for JPEG", __func__, 1);
674 return ERROR_INT(
"jpeg data not made", __func__, 1);
676 if (format == IFF_JP2) {
679 if ((pix = pixRead(fname)) == NULL)
680 return ERROR_INT(
"pix not returned for JP2K", __func__, 1);
685 return ERROR_INT(
"jp2k data not made", __func__, 1);
687 if ((pix = pixRead(fname)) == NULL)
688 return ERROR_INT(
"pix not returned for G4", __func__, 1);
692 return ERROR_INT(
"g4 data not made", __func__, 1);
695 return ERROR_INT(
"flate data not made", __func__, 1);
697 return ERROR_INT(
"invalid conversion type", __func__, 1);
730l_uint8 *pngcomp = NULL;
731l_uint8 *datacomp = NULL;
732l_uint8 *cmapdata = NULL;
733char *cmapdatahex = NULL;
735l_int32 format, interlaced;
739l_int32 w, h, cmapflag;
741size_t nbytescomp = 0, nbytespng = 0;
748 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
750 findFileFormat(fname, &format);
754 if (format == IFF_PNG) {
755 isPngInterlaced(fname, &interlaced);
756 if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
757 return (
L_COMP_DATA *)ERROR_PTR(
"bad png input", __func__, NULL);
768 if (format != IFF_PNG ||
769 (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
772 pix = pixRead(fname);
774 pix = pixClone(pixs);
776 return (
L_COMP_DATA *)ERROR_PTR(
"pix not made", __func__, NULL);
785 if ((fp = fopenReadStream(fname)) == NULL)
786 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
787 fname, __func__, NULL);
788 freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
789 fgetPngResolution(fp, &xres, &yres);
797 if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
798 return (
L_COMP_DATA *)ERROR_PTR_1(
"unable to read file",
799 fname, __func__, NULL);
807 if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
809 return (
L_COMP_DATA *)ERROR_PTR(
"unable to allocate memory",
823 for (i = 16; i < nbytespng; i += 12) {
825 n = pngcomp[i - 8] << 24;
826 n += pngcomp[i - 7] << 16;
827 n += pngcomp[i - 6] << 8;
828 n += pngcomp[i - 5] << 0;
829 if (n >= nbytespng - i) {
832 pixcmapDestroy(&cmap);
833 L_ERROR(
"invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
839 if (memcmp(pngcomp + i - 4,
"IDAT", 4) == 0) {
840 memcpy(datacomp + nbytescomp, pngcomp + i, n);
845 if (cmapflag && !cmap &&
846 memcmp(pngcomp + i - 4,
"PLTE", 4) == 0) {
847 if ((n / 3) > (1 << bps)) {
850 pixcmapDestroy(&cmap);
851 L_ERROR(
"invalid png: i = %d, n = %d, cmapsize = %d\n",
852 __func__, i, n, (1 << bps));
855 cmap = pixcmapCreate(bps);
856 for (j = i; j < i + n; j += 3) {
857 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
865 if (nbytescomp == 0) {
867 pixcmapDestroy(&cmap);
868 return (
L_COMP_DATA *)ERROR_PTR(
"invalid PNG file", __func__, NULL);
874 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
875 pixcmapDestroy(&cmap);
878 return (
L_COMP_DATA *)ERROR_PTR(
"cmapdata not made",
881 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
930l_int32 w, h, xres, yres, bps, spp;
931size_t nbytes, nbytes85;
936 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
938 if (ascii85flag != 0 && ascii85flag != 1)
939 return (
L_COMP_DATA *)ERROR_PTR(
"wrong ascii85flags", __func__, NULL);
942 if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
943 return (
L_COMP_DATA *)ERROR_PTR(
"bad jpeg metadata", __func__, NULL);
945 if ((fp = fopenReadStream(fname)) == NULL)
946 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
947 fname, __func__, NULL);
948 fgetJpegResolution(fp, &xres, &yres);
953 if ((data = l_binaryRead(fname, &nbytes)) == NULL)
954 return (
L_COMP_DATA *)ERROR_PTR_1(
"data not extracted",
955 fname, __func__, NULL);
958 if (ascii85flag == 1) {
959 data85 = encodeAscii85(data, nbytes, &nbytes85);
962 return (
L_COMP_DATA *)ERROR_PTR_1(
"data85 not made",
963 fname, __func__, NULL);
965 data85[nbytes85 - 1] =
'\0';
969 if (ascii85flag == 0) {
1004 l_int32 ascii85flag)
1007l_int32 w, h, xres, yres, bps, spp;
1012 return (
L_COMP_DATA *)ERROR_PTR(
"data not defined", __func__, NULL);
1015 if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
1017 return (
L_COMP_DATA *)ERROR_PTR(
"bad jpeg metadata", __func__, NULL);
1020 readResolutionMemJpeg(data, nbytes, &xres, &yres);
1023 if (ascii85flag == 1) {
1024 data85 = encodeAscii85(data, nbytes, &nbytes85);
1027 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
1029 data85[nbytes85 - 1] =
'\0';
1033 if (ascii85flag == 0) {
1064l_int32 w, h, bps, spp, xres, yres;
1070 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1072 if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
1073 return (
L_COMP_DATA *)ERROR_PTR(
"bad jp2k metadata", __func__, NULL);
1077 if ((cid->
datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
1079 return (
L_COMP_DATA *)ERROR_PTR(
"data not extracted", __func__, NULL);
1083 if ((fp = fopenReadStream(fname)) != NULL) {
1084 fgetJp2kResolution(fp, &xres, &yres);
1115 l_int32 ascii85flag)
1117l_uint8 *datacomp = NULL;
1119l_int32 w, h, xres, yres, npages;
1121size_t nbytes85, nbytescomp;
1126 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1129 if ((fp = fopenReadStream(fname)) == NULL)
1130 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
1131 fname, __func__, NULL);
1132 tiffGetCount(fp, &npages);
1135 L_ERROR(
" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname);
1140 if ((fp = fopenReadStream(fname)) == NULL)
1141 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
1142 fname, __func__, NULL);
1143 getTiffResolution(fp, &xres, &yres);
1149 if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
1150 &w, &h, &minisblack)) {
1151 return (
L_COMP_DATA *)ERROR_PTR_1(
"datacomp not extracted",
1152 fname, __func__, NULL);
1156 if (ascii85flag == 1) {
1157 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1158 LEPT_FREE(datacomp);
1160 return (
L_COMP_DATA *)ERROR_PTR_1(
"data85 not made",
1161 fname, __func__, NULL);
1163 data85[nbytes85 - 1] =
'\0';
1167 if (ascii85flag == 0) {
1212l_int32 w, h, d, maxAsp;
1216 return ERROR_INT(
"&cid not defined", __func__, 1);
1219 return ERROR_INT(
"pixs not defined", __func__, 1);
1222 selectDefaultPdfEncoding(pixs, &type);
1224 if (ascii85 != 0 && ascii85 != 1)
1225 return ERROR_INT(
"invalid ascii85", __func__, 1);
1226 pixGetDimensions(pixs, &w, &h, NULL);
1227 if (w == 0 || h == 0)
1228 return ERROR_INT(
"invalid w or h", __func__, 1);
1229 maxAsp = L_MAX(w / h, h / w);
1231 return ERROR_INT(
"max asperity > 10", __func__, 1);
1235#if defined(HAVE_LIBZ)
1236# if !defined(HAVE_LIBJPEG)
1238 L_WARNING(
"no libjpeg; using flate encoding\n", __func__);
1242# if !defined(HAVE_LIBJP2K)
1244 L_WARNING(
"no libjp2k; using flate encoding\n", __func__);
1248# if !defined(HAVE_LIBTIFF)
1250 L_WARNING(
"no libtiff; using flate encoding\n", __func__);
1257 d = pixGetDepth(pixs);
1258 cmap = pixGetColormap(pixs);
1260 L_WARNING(
"pixs has cmap; using flate encoding\n", __func__);
1263 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
1266 L_WARNING(
"pixs has > 1 bpp; using flate encoding\n", __func__);
1272 return ERROR_INT(
"jpeg data not made", __func__, 1);
1275 return ERROR_INT(
"jp2k data not made", __func__, 1);
1278 return ERROR_INT(
"g4 data not made", __func__, 1);
1281 return ERROR_INT(
"flate data not made", __func__, 1);
1309 l_int32 ascii85flag)
1315 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1317 if ((pixs = pixRead(fname)) == NULL)
1318 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not made", __func__, NULL);
1344 l_int32 ascii85flag)
1346l_uint8 *data = NULL;
1347l_uint8 *datacomp = NULL;
1349l_uint8 *cmapdata = NULL;
1350char *cmapdata85 = NULL;
1351char *cmapdatahex = NULL;
1355l_int32 w, h, d, cmapflag;
1356size_t ncmapbytes85 = 0;
1358size_t nbytes, nbytescomp;
1364 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1371 pixGetDimensions(pixs, &w, &h, &d);
1372 cmap = pixGetColormap(pixs);
1373 cmapflag = (cmap) ? 1 : 0;
1374 if (d == 2 || d == 4 || d == 16) {
1375 pixt = pixConvertTo8(pixs, cmapflag);
1376 cmap = pixGetColormap(pixt);
1377 d = pixGetDepth(pixt);
1378 }
else if (d == 32 && pixGetSpp(pixs) == 4) {
1379 pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
1381 pixt = pixClone(pixs);
1384 return (
L_COMP_DATA *)ERROR_PTR(
"pixt not made", __func__, NULL);
1385 spp = (d == 32) ? 3 : 1;
1386 bps = (d == 32) ? 8 : d;
1391 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
1394 return (
L_COMP_DATA *)ERROR_PTR(
"cmapdata not made",
1398 cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
1399 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
1400 LEPT_FREE(cmapdata);
1404 pixGetRasterData(pixt, &data, &nbytes);
1407 LEPT_FREE(cmapdata85);
1408 LEPT_FREE(cmapdatahex);
1409 return (
L_COMP_DATA *)ERROR_PTR(
"data not returned", __func__, NULL);
1411 datacomp = zlibCompress(data, nbytes, &nbytescomp);
1414 LEPT_FREE(cmapdata85);
1415 LEPT_FREE(cmapdatahex);
1416 return (
L_COMP_DATA *)ERROR_PTR(
"datacomp not made", __func__, NULL);
1420 if (ascii85flag == 1) {
1421 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1422 LEPT_FREE(datacomp);
1424 LEPT_FREE(cmapdata85);
1425 LEPT_FREE(cmapdatahex);
1426 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
1428 data85[nbytes85 - 1] =
'\0';
1433 if (ascii85flag == 0) {
1448 cid->
res = pixGetXRes(pixs);
1472 l_int32 ascii85flag,
1480 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1481 if (pixGetColormap(pixs))
1482 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1483 d = pixGetDepth(pixs);
1484 if (d != 8 && d != 16 && d != 32)
1485 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 8, 16 or 32 bpp",
1489 fname = l_makeTempFilename();
1490 if (pixWriteJpeg(fname, pixs, quality, 0)) {
1497 if (lept_rmfile(fname) != 0)
1498 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1527 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1528 if (pixGetColormap(pixs))
1529 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1530 d = pixGetDepth(pixs);
1531 if (d != 8 && d != 32)
1532 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 8 or 32 bpp", __func__, NULL);
1535 fname = l_makeTempFilename();
1536 if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
1543 if (lept_rmfile(fname) != 0)
1544 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1566 l_int32 ascii85flag)
1572 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1573 if (pixGetDepth(pixs) != 1)
1574 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 1 bpp", __func__, NULL);
1575 if (pixGetColormap(pixs))
1576 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1579 fname = l_makeTempFilename();
1580 if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
1586 if (lept_rmfile(fname) != 0)
1587 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1618 if (!pdata || !pnbytes)
1619 return ERROR_INT(
"&data and &nbytes not both defined", __func__, 1);
1623 return ERROR_INT(
"cid not defined", __func__, 1);
1628 res = DefaultInputRes;
1629 wpt = cid->
w * 72.f / res;
1630 hpt = cid->
h * 72.f / res;
1633 if ((lpd = pdfdataCreate(title)) == NULL)
1634 return ERROR_INT(
"lpd not made", __func__, 1);
1635 ptraAdd(lpd->
cida, cid);
1637 ptaAddPt(lpd->
xy, 0, 0);
1638 ptaAddPt(lpd->
wh, wpt, hpt);
1642 pdfdataDestroy(&lpd);
1644 return ERROR_INT(
"pdf output not made", __func__, 1);
1661 L_WARNING(
"ptr address is null!\n", __func__);
1664 if ((cid = *pcid) == NULL)
1706 return ERROR_INT(
"&data not defined", __func__, 1);
1709 return ERROR_INT(
"&nbytes not defined", __func__, 1);
1712 return ERROR_INT(
"lpd not defined", __func__, 1);
1714 generateFixedStringsPdf(lpd);
1715 generateMediaboxPdf(lpd);
1716 generatePageStringPdf(lpd);
1717 generateContentStringPdf(lpd);
1718 generatePreXStringsPdf(lpd);
1719 generateColormapStringsPdf(lpd);
1720 generateTrailerPdf(lpd);
1728char buf[L_SMALLBUF];
1729char *version, *datestr;
1733 lpd->
id = stringNew(
"%PDF-1.5\n");
1734 l_dnaAddNumber(lpd->
objsize, strlen(lpd->
id));
1736 lpd->
obj1 = stringNew(
"1 0 obj\n"
1744 sa = sarrayCreate(0);
1745 sarrayAddString(sa,
"2 0 obj\n"
1747 if (var_WRITE_DATE_AND_VERSION) {
1748 datestr = l_getFormattedDate();
1749 snprintf(buf,
sizeof(buf),
"/CreationDate (D:%s)\n", datestr);
1750 sarrayAddString(sa, buf,
L_COPY);
1752 version = getLeptonicaVersion();
1753 snprintf(buf,
sizeof(buf),
1754 "/Producer (leptonica: %s)\n", version);
1757 snprintf(buf,
sizeof(buf),
"/Producer (leptonica)\n");
1759 sarrayAddString(sa, buf,
L_COPY);
1763 snprintf(buf,
sizeof(buf),
"/Title %s\n", hexstr);
1764 sarrayAddString(sa, buf,
L_COPY);
1766 L_ERROR(
"title string is not ascii\n", __func__);
1770 sarrayAddString(sa,
">>\n"
1772 lpd->
obj2 = sarrayToString(sa, 0);
1776 lpd->
obj3 = stringNew(
"3 0 obj\n"
1813l_int32 i, nchar, buflen;
1816 return (
char *)ERROR_PTR(
"str not defined", __func__, NULL);
1817 nchar = strlen(str);
1818 for (i = 0; i < nchar; i++) {
1820 return (
char *)ERROR_PTR(
"str not all ascii", __func__, NULL);
1823 buflen = 4 * nchar + 10;
1824 buffer = (
char *)LEPT_CALLOC(buflen,
sizeof(
char));
1825 stringCat(buffer, buflen,
"<feff");
1826 for (i = 0; i < nchar; i++) {
1827 snprintf(smallbuf,
sizeof(smallbuf),
"%04x", str[i]);
1828 stringCat(buffer, buflen, smallbuf);
1830 stringCat(buffer, buflen,
">");
1839l_float32 xpt, ypt, wpt, hpt, maxx, maxy;
1844 for (i = 0; i < lpd->
n; i++) {
1845 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1846 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1847 maxx = L_MAX(maxx, xpt + wpt);
1848 maxy = L_MAX(maxy, ypt + hpt);
1851 lpd->
mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
1852 (l_int32)(maxy + 0.5));
1858 for (i = 0; i < lpd->
n; i++) {
1859 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1860 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1861 ptaSetPt(lpd->
xy, i, xpt, maxy - ypt - hpt);
1871l_int32 bufsize, i, wpt, hpt;
1877 bufsize = 1000 + 50 * lpd->
n;
1878 if ((buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char))) == NULL)
1879 return ERROR_INT(
"calloc fail for buf", __func__, 1);
1881 boxGetGeometry(lpd->
mediabox, NULL, NULL, &wpt, &hpt);
1882 sa = sarrayCreate(lpd->
n);
1883 for (i = 0; i < lpd->
n; i++) {
1884 snprintf(buf, bufsize,
"/Im%d %d 0 R ", i + 1, 6 + i);
1885 sarrayAddString(sa, buf,
L_COPY);
1887 xstr = sarrayToString(sa, 0);
1891 return ERROR_INT(
"xstr not made", __func__, 1);
1894 snprintf(buf, bufsize,
"4 0 obj\n"
1898 "/MediaBox [%d %d %d %d]\n"
1902 "/XObject << %s >>\n"
1903 "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
1907 0, 0, wpt, hpt, xstr);
1909 lpd->
obj4 = stringNew(buf);
1924l_float32 xpt, ypt, wpt, hpt;
1927 bufsize = 1000 + 200 * lpd->
n;
1928 if ((buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char))) == NULL)
1929 return ERROR_INT(
"calloc fail for buf", __func__, 1);
1931 sa = sarrayCreate(lpd->
n);
1932 for (i = 0; i < lpd->
n; i++) {
1933 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1934 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1935 snprintf(buf, bufsize,
1936 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
1937 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
1938 sarrayAddString(sa, buf,
L_COPY);
1940 cstr = sarrayToString(sa, 0);
1944 return ERROR_INT(
"cstr not made", __func__, 1);
1947 snprintf(buf, bufsize,
"5 0 obj\n"
1948 "<< /Length %d >>\n"
1953 (l_int32)strlen(cstr), cstr);
1955 lpd->
obj5 = stringNew(buf);
1969char *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
1975 cmindex = 6 + lpd->
n;
1976 for (i = 0; i < lpd->
n; i++) {
1978 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
1979 return ERROR_INT(
"cid not found", __func__, 1);
1982 if (var_WRITE_G4_IMAGE_MASK) {
1983 cstr = stringNew(
"/ImageMask true\n"
1984 "/ColorSpace /DeviceGray");
1986 cstr = stringNew(
"/ColorSpace /DeviceGray");
1988 bstr = stringNew(
"/BitsPerComponent 1\n"
1989 "/Interpolate true");
1996 photometry = (cid->
minisblack) ? stringNew(
"true")
1997 : stringNew(
"false");
1998 snprintf(buff,
sizeof(buff),
1999 "/Filter /CCITTFaxDecode\n"
2005 ">>", photometry, cid->
w);
2006 fstr = stringNew(buff);
2007 LEPT_FREE(photometry);
2010 cstr = stringNew(
"/ColorSpace /DeviceGray");
2011 else if (cid->
spp == 3)
2012 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2013 else if (cid->
spp == 4)
2014 cstr = stringNew(
"/ColorSpace /DeviceCMYK");
2016 L_ERROR(
"in jpeg: spp != 1, 3 or 4\n", __func__);
2017 bstr = stringNew(
"/BitsPerComponent 8");
2018 fstr = stringNew(
"/Filter /DCTDecode");
2021 cstr = stringNew(
"/ColorSpace /DeviceGray");
2022 else if (cid->
spp == 3)
2023 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2025 L_ERROR(
"in jp2k: spp != 1 && spp != 3\n", __func__);
2026 bstr = stringNew(
"/BitsPerComponent 8");
2027 fstr = stringNew(
"/Filter /JPXDecode");
2030 snprintf(buff,
sizeof(buff),
"/ColorSpace %d 0 R", cmindex++);
2031 cstr = stringNew(buff);
2033 if (cid->
spp == 1 && cid->
bps == 1)
2034 cstr = stringNew(
"/ColorSpace /DeviceGray\n"
2036 else if (cid->
spp == 1)
2037 cstr = stringNew(
"/ColorSpace /DeviceGray");
2038 else if (cid->
spp == 3)
2039 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2041 L_ERROR(
"unknown colorspace: spp = %d\n",
2042 __func__, cid->
spp);
2044 snprintf(buff,
sizeof(buff),
"/BitsPerComponent %d", cid->
bps);
2045 bstr = stringNew(buff);
2046 fstr = stringNew(
"/Filter /FlateDecode");
2048 snprintf(buff,
sizeof(buff),
2054 " /BitsPerComponent %d\n"
2055 ">>\n", cid->
w, cid->
spp, cid->
bps);
2056 pstr = stringNew(buff);
2060 pstr = stringNew(
"");
2062 snprintf(buf,
sizeof(buf),
2076 cid->
w, cid->
h, bstr, fstr, pstr);
2077 xstr = stringNew(buf);
2078 sarrayAddString(sa, xstr,
L_INSERT);
2096l_int32 i, cmindex, ncmap;
2104 cmindex = 6 + lpd->
n;
2106 for (i = 0; i < lpd->
n; i++) {
2107 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
2108 return ERROR_INT(
"cid not found", __func__, 1);
2109 if (cid->
ncolors == 0)
continue;
2112 snprintf(buf,
sizeof(buf),
"%d 0 obj\n"
2113 "[ /Indexed /DeviceRGB\n"
2120 cmstr = stringNew(buf);
2121 l_dnaAddNumber(lpd->
objsize, strlen(cmstr));
2122 sarrayAddString(sa, cmstr,
L_INSERT);
2133l_int32 i, n, size, linestart;
2134L_DNA *daloc, *dasize;
2157 l_dnaAddNumber(daloc, linestart);
2158 n = l_dnaGetCount(dasize);
2159 for (i = 0; i < n; i++) {
2160 l_dnaGetIValue(dasize, i, &size);
2162 l_dnaAddNumber(daloc, linestart);
2164 l_dnaGetIValue(daloc, n, &lpd->
xrefloc);
2167 lpd->
trailer = makeTrailerStringPdf(daloc);
2172makeTrailerStringPdf(
L_DNA *daloc)
2176l_int32 i, n, linestart, xrefloc;
2180 return (
char *)ERROR_PTR(
"daloc not defined", __func__, NULL);
2181 n = l_dnaGetCount(daloc) - 1;
2183 sa = sarrayCreate(0);
2184 snprintf(buf,
sizeof(buf),
"xref\n"
2186 "0000000000 65535 f \n", n);
2187 sarrayAddString(sa, buf,
L_COPY);
2188 for (i = 1; i < n; i++) {
2189 l_dnaGetIValue(daloc, i, &linestart);
2190 snprintf(buf,
sizeof(buf),
"%010d 00000 n \n", linestart);
2191 sarrayAddString(sa, buf,
L_COPY);
2194 l_dnaGetIValue(daloc, n, &xrefloc);
2195 snprintf(buf,
sizeof(buf),
"trailer\n"
2203 "%%%%EOF\n", n, xrefloc);
2204 sarrayAddString(sa, buf,
L_COPY);
2205 outstr = sarrayToString(sa, 0);
2231l_int32 nimages, i, len;
2232l_int32 *sizes, *locs;
2237 return ERROR_INT(
"&data not defined", __func__, 1);
2240 return ERROR_INT(
"&nbytes not defined", __func__, 1);
2243 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes,
sizeof(l_uint8))) == NULL)
2244 return ERROR_INT(
"calloc fail for data", __func__, 1);
2247 sizes = l_dnaGetIArray(lpd->
objsize);
2248 locs = l_dnaGetIArray(lpd->
objloc);
2249 memcpy(data, lpd->
id, sizes[0]);
2250 memcpy(data + locs[1], lpd->
obj1, sizes[1]);
2251 memcpy(data + locs[2], lpd->
obj2, sizes[2]);
2252 memcpy(data + locs[3], lpd->
obj3, sizes[3]);
2253 memcpy(data + locs[4], lpd->
obj4, sizes[4]);
2254 memcpy(data + locs[5], lpd->
obj5, sizes[5]);
2259 for (i = 0; i < nimages; i++) {
2260 if ((cid = pdfdataGetCid(lpd, i)) == NULL) {
2263 return ERROR_INT(
"cid not found", __func__, 1);
2267 memcpy(data + locs[6 + i], str, len);
2268 memcpy(data + locs[6 + i] + len,
2270 memcpy(data + locs[6 + i] + len + cid->
nbytescomp,
2275 for (i = 0; i < lpd->
ncmap; i++) {
2277 memcpy(data + locs[6 + nimages + i], str, strlen(str));
2305l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
2307L_DNA *da, *daobj, *daxref;
2311 return ERROR_INT(
"&da not defined", __func__, 1);
2314 return ERROR_INT(
"bas not defined", __func__, 1);
2315 data = l_byteaGetData(bas, &size);
2316 if (memcmp(data,
"%PDF-1.", 7) != 0)
2317 return ERROR_INT(
"PDF header signature not found", __func__, 1);
2323 arrayFindSequence(data + start, size - start,
2324 (l_uint8 *)
"startxref\n", 10, &loc, &found);
2326 return ERROR_INT(
"startxref not found!", __func__, 1);
2327 if (sscanf((
char *)(data + start + loc + 10),
"%d\n", &xrefloc) != 1)
2328 return ERROR_INT(
"xrefloc not found!", __func__, 1);
2329 if (xrefloc < 0 || xrefloc >= size)
2330 return ERROR_INT(
"invalid xrefloc!", __func__, 1);
2331 sa = sarrayCreateLinesFromString((
char *)(data + xrefloc), 0);
2332 str = sarrayGetString(sa, 1,
L_NOCOPY);
2333 if ((sscanf(str,
"0 %d", &nobj)) != 1) {
2335 return ERROR_INT(
"nobj not found", __func__, 1);
2340 da = l_dnaCreate(nobj + 1);
2342 for (i = 0; i < nobj; i++) {
2343 str = sarrayGetString(sa, i + 2,
L_NOCOPY);
2344 sscanf(str,
"%d", &startloc);
2345 l_dnaAddNumber(da, startloc);
2347 l_dnaAddNumber(da, xrefloc);
2350 lept_stderr(
"************** Trailer string ************\n");
2351 lept_stderr(
"xrefloc = %d", xrefloc);
2352 sarrayWriteStderr(sa);
2354 lept_stderr(
"************** Object locations ************");
2355 l_dnaWriteStderr(da);
2361 for (i = 1; i < nobj; i++) {
2362 l_dnaGetIValue(da, i, &startloc);
2363 if ((sscanf((
char *)(data + startloc),
"%d 0 obj", &objno)) != 1) {
2364 L_ERROR(
"bad trailer for object %d\n", __func__, i);
2372 L_INFO(
"rebuilding pdf trailer\n", __func__);
2374 l_dnaAddNumber(da, 0);
2375 l_byteaFindEachSequence(bas, (l_uint8 *)
" 0 obj\n", 7, &daobj);
2376 nobj = l_dnaGetCount(daobj);
2377 for (i = 0; i < nobj; i++) {
2378 l_dnaGetIValue(daobj, i, &loc);
2379 for (j = loc - 1; j > 0; j--) {
2383 l_dnaAddNumber(da, j + 1);
2385 l_byteaFindEachSequence(bas, (l_uint8 *)
"xref", 4, &daxref);
2386 l_dnaGetIValue(daxref, 0, &loc);
2387 l_dnaAddNumber(da, loc);
2388 l_dnaDestroy(&daobj);
2389 l_dnaDestroy(&daxref);
2397generatePagesObjStringPdf(
NUMA *napage)
2401l_int32 i, n, index, bufsize;
2405 return (
char *)ERROR_PTR(
"napage not defined", __func__, NULL);
2407 n = numaGetCount(napage);
2408 bufsize = 100 + 16 * n;
2409 buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
2410 sa = sarrayCreate(n);
2411 for (i = 0; i < n; i++) {
2412 numaGetIValue(napage, i, &index);
2413 snprintf(buf, bufsize,
" %d 0 R ", index);
2414 sarrayAddString(sa, buf,
L_COPY);
2417 str = sarrayToString(sa, 0);
2418 snprintf(buf, bufsize - 1,
"3 0 obj\n"
2456l_int32 start, nrepl, i, j, nobjs, objin, objout, found;
2457l_int32 *objs, *matches;
2463 return (
L_BYTEA *)ERROR_PTR(
"bas not defined", __func__, NULL);
2465 return (
L_BYTEA *)ERROR_PTR(
"na_objs not defined", __func__, NULL);
2467 datas = l_byteaGetData(bas, &size);
2468 bad = l_byteaCreate(100);
2469 objs = numaGetIArray(na_objs);
2470 nobjs = numaGetCount(na_objs);
2473 sscanf((
char *)datas,
"%d", &objin);
2474 if (objin < 0 || objin >= nobjs) {
2475 L_ERROR(
"index %d into array of size %d\n", __func__, objin, nobjs);
2479 objout = objs[objin];
2480 snprintf((
char *)buf, 32,
"%d", objout);
2481 l_byteaAppendString(bad, (
char *)buf);
2484 arrayFindSequence(datas, size, &space, 1, &start, &found);
2485 da_match = arrayFindEachSequence(datas, size, (l_uint8 *)
" 0 R", 4);
2487 l_byteaAppendData(bad, datas + start, size - start);
2493 nrepl = l_dnaGetCount(da_match);
2494 matches = l_dnaGetIArray(da_match);
2495 for (i = 0; i < nrepl; i++) {
2497 for (j = matches[i] - 1; j > 0; j--) {
2498 if (datas[j] == space)
2502 l_byteaAppendData(bad, datas + start, j - start + 1);
2503 sscanf((
char *)(datas + j + 1),
"%d", &objin);
2504 if (objin < 0 || objin >= nobjs) {
2505 L_ERROR(
"index %d into array of size %d\n", __func__, objin, nobjs);
2508 l_dnaDestroy(&da_match);
2511 objout = objs[objin];
2512 snprintf((
char *)buf, 32,
"%d", objout);
2513 l_byteaAppendString(bad, (
char *)buf);
2516 l_byteaAppendData(bad, datas + start, size - start);
2520 l_dnaDestroy(&da_match);
2529pdfdataCreate(
const char *title)
2534 if (title) lpd->
title = stringNew(title);
2535 lpd->
cida = ptraCreate(10);
2536 lpd->
xy = ptaCreate(10);
2537 lpd->
wh = ptaCreate(10);
2538 lpd->
saprex = sarrayCreate(10);
2539 lpd->
sacmap = sarrayCreate(10);
2540 lpd->
objsize = l_dnaCreate(20);
2541 lpd->
objloc = l_dnaCreate(20);
2553 L_WARNING(
"ptr address is null!\n", __func__);
2556 if ((lpd = *plpd) == NULL)
2560 for (i = 0; i < lpd->
n; i++) {
2565 ptraDestroy(&lpd->
cida, 0, 0);
2566 if (lpd->
id) LEPT_FREE(lpd->
id);
2567 if (lpd->
obj1) LEPT_FREE(lpd->
obj1);
2568 if (lpd->
obj2) LEPT_FREE(lpd->
obj2);
2569 if (lpd->
obj3) LEPT_FREE(lpd->
obj3);
2570 if (lpd->
obj4) LEPT_FREE(lpd->
obj4);
2571 if (lpd->
obj5) LEPT_FREE(lpd->
obj5);
2574 if (lpd->
xy) ptaDestroy(&lpd->
xy);
2575 if (lpd->
wh) ptaDestroy(&lpd->
wh);
2591 return (
L_COMP_DATA *)ERROR_PTR(
"lpd not defined", __func__, NULL);
2592 if (index < 0 || index >= lpd->
n)
2593 return (
L_COMP_DATA *)ERROR_PTR(
"invalid image index", __func__, NULL);
2625l_int32 format, loc, ret, npages, found;
2629 return ERROR_INT(
"&npages not defined", __func__, 1);
2632 return ERROR_INT(
"fname not defined", __func__, 1);
2635 findFileFormat(fname, &format);
2636 if (format != IFF_LPDF)
2637 return ERROR_INT(
"file is not pdf", __func__, 1);
2640 if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))
2642 return ERROR_INT(
"partial data not read", __func__, 1);
2647 arrayFindSequence(data, nread, (
const l_uint8 *)
"/Count",
2648 strlen(
"/Count"), &loc, &found);
2650 lept_stderr(
"Reading entire file looking for '/Count'\n");
2652 if ((data = l_binaryRead(fname, &nread)) == NULL)
2653 return ERROR_INT(
"full data not read", __func__, 1);
2654 arrayFindSequence(data, nread, (
const l_uint8 *)
"/Count",
2655 strlen(
"/Count"), &loc, &found);
2658 L_WARNING(
"/Count not found\n", __func__);
2664 if (nread - loc < 12) {
2666 return ERROR_INT(
"data may not include page count field", __func__, 1);
2670 ret = sscanf((
char *)&data[loc],
"/Count %d", &npages);
2673 return ERROR_INT(
"npages not found", __func__, 1);
2712l_int32 i, nw, nh, format, ret, loc, width, height;
2720 if (pnaw) *pnaw = NULL;
2721 if (pnah) *pnah = NULL;
2722 if (pmedw) *pmedw = 0;
2723 if (pmedh) *pmedh = 0;
2724 if (!pnaw && !pnah && !pmedw && !pmedh)
2725 return ERROR_INT(
"no output requested", __func__, 1);
2727 return ERROR_INT(
"fname not defined", __func__, 1);
2730 findFileFormat(fname, &format);
2731 if (format != IFF_LPDF)
2732 return ERROR_INT(
"file is not pdf", __func__, 1);
2736 if ((data = l_binaryRead(fname, &nread)) == NULL)
2737 return ERROR_INT(
"full data not read", __func__, 1);
2738 dnaw = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/Width",
2740 dnah = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/Height",
2743 L_WARNING(
"unable to find widths\n", __func__);
2745 L_WARNING(
"unable to find heights\n", __func__);
2746 if (!dnaw && !dnah) {
2748 L_WARNING(
"no fields found\n", __func__);
2753 nw = l_dnaGetCount(dnaw);
2754 naw = numaCreate(nw);
2755 for (i = 0; i < nw; i++) {
2756 l_dnaGetIValue(dnaw, i, &loc);
2757 ret = sscanf((
char *)&data[loc],
"/Width %d", &width);
2759 L_ERROR(
"width not found for item %d at loc %d\n",
2763 numaAddNumber(naw, width);
2765 nh = l_dnaGetCount(dnah);
2766 nah = numaCreate(nh);
2767 for (i = 0; i < nh; i++) {
2768 l_dnaGetIValue(dnah, i, &loc);
2769 ret = sscanf((
char *)&data[loc],
"/Height %d", &height);
2771 L_ERROR(
"height not found for item %d at loc %d\n",
2775 numaAddNumber(nah, height);
2779 l_dnaDestroy(&dnaw);
2780 l_dnaDestroy(&dnah);
2782 numaGetMedian(naw, &fval);
2783 *pmedw = lept_roundftoi(fval);
2790 numaGetMedian(nah, &fval);
2791 *pmedh = lept_roundftoi(fval);
2835l_int32 i, n, format, ret, loc;
2836l_float32 fval, ignore1, ignore2, w, h;
2842 if (pnaw) *pnaw = NULL;
2843 if (pnah) *pnah = NULL;
2844 if (pmedw) *pmedw = 0;
2845 if (pmedh) *pmedh = 0;
2846 if (!pnaw && !pnah && !pmedw && !pmedh)
2847 return ERROR_INT(
"no output requested", __func__, 1);
2849 return ERROR_INT(
"fname not defined", __func__, 1);
2852 findFileFormat(fname, &format);
2853 if (format != IFF_LPDF)
2854 return ERROR_INT(
"file is not pdf", __func__, 1);
2857 if ((data = l_binaryRead(fname, &nread)) == NULL)
2858 return ERROR_INT(
"full data not read", __func__, 1);
2859 dna = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/MediaBox",
2860 strlen(
"/MediaBox"));
2863 L_WARNING(
"no mediaboxes found\n", __func__);
2868 n = l_dnaGetCount(dna);
2869 naw = numaCreate(n);
2870 nah = numaCreate(n);
2871 for (i = 0; i < n; i++) {
2872 l_dnaGetIValue(dna, i, &loc);
2873 ret = sscanf((
char *)&data[loc],
"/MediaBox [ %f %f %f %f",
2874 &ignore1, &ignore2, &w, &h);
2876 L_ERROR(
"mediabox sizes not found for item %d at loc %d\n",
2880 numaAddNumber(naw, w);
2881 numaAddNumber(nah, h);
2887 numaGetMedian(naw, &fval);
2888 *pmedw = lept_roundftoi(fval);
2889 if (*pmedw > 850) lept_stderr(
"oversize width: %d\n", *pmedw);
2896 numaGetMedian(nah, &fval);
2897 *pmedh = lept_roundftoi(fval);
2898 if (*pmedh > 850) lept_stderr(
"oversize height: %d\n", *pmedh);
2945char *tail, *basename, *fname;
2946l_int32 ret, res, medw, medh, medmax, npages, pageno, w, h;
2950 return ERROR_INT(
"&res not defined", __func__, 1);
2954 L_INFO(
"Requires pdftoppm, so this is disabled on windows.\n"
2955 "Returns default resolution 300 ppi", __func__);
2960 L_INFO(
"Running pdftoppm is disabled; "
2961 "use setLeptDebugOK(1) to enable\n"
2962 "returns default resolution 300 ppi\n", __func__);
2967 return ERROR_INT(
"infile not defined", __func__, 1);
2969 return ERROR_INT(
"outdir not defined", __func__, 1);
2974 lept_stderr(
"Media Box medians: medw = %d, medh = %d\n", medw, medh);
2975 medmax = L_MAX(medw, medh);
2977 res = 300 * ((l_float32)792 / (l_float32)medmax);
2978 lept_stderr(
" Oversize media box; use resolution = %d\n", res);
2985 lept_stderr(
"Media Box dimensions not found\n");
2987 pageno = (npages > 0) ? (npages + 1) / 2 : 1;
2988 splitPathAtDirectory(infile, NULL, &tail);
2989 splitPathAtExtension(tail, &basename, NULL);
2990 snprintf(buf,
sizeof(buf),
"pdftoppm -f %d -l %d -r 72 %s %s/%s",
2991 pageno, pageno, infile, outdir, basename);
2993 LEPT_FREE(basename);
2994 callSystemDebug(buf);
2997 sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);
2998 fname = sarrayGetString(sa, 0,
L_NOCOPY);
2999 pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);
3001 if (w > 0 && h > 0) {
3002 res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);
3004 lept_stderr(
"Use resolution = %d\n", res);
3006 L_ERROR(
"page size not found; assuming res = 300\n", __func__);
3033 var_WRITE_G4_IMAGE_MASK = flag;
3053 var_WRITE_DATE_AND_VERSION = flag;
void l_CIDataDestroy(L_COMP_DATA **pcid)
l_CIDataDestroy()
L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
l_generateJpegDataMem()
l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
pixGenerateCIData()
l_ok getPdfRendererResolution(const char *infile, const char *outdir, l_int32 *pres)
getPdfRendererResolution()
static L_COMP_DATA * pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag)
pixGenerateFlateData()
L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag)
l_generateFlateData()
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag)
l_generateJpegData()
static char * generateEscapeString(const char *str)
generateEscapeString()
L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag)
l_generateG4Data()
static L_COMP_DATA * pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, l_int32 quality)
pixGenerateJpegData()
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
static L_BYTEA * substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs)
substituteObjectNumbers()
l_ok getPdfMediaBoxSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfMediaBoxSizes()
l_ok getPdfPageCount(const char *fname, l_int32 *pnpages)
getPdfPageCount()
l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout)
convertTiffMultipageToPdf()
l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
l_generateCIData()
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda)
parseTrailerPdf()
static L_COMP_DATA * pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag)
pixGenerateG4Data()
l_ok getPdfPageSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfPageSizes()
void l_pdfSetG4ImageMask(l_int32 flag)
l_pdfSetG4ImageMask()
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
static L_COMP_DATA * l_generateJp2kData(const char *fname)
l_generateJp2kData()
L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pixs)
l_generateFlateDataPdf()
static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
l_generatePdf()
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
generateOutputDataPdf()
static L_COMP_DATA * pixGenerateJp2kData(PIX *pixs, l_int32 quality)
pixGenerateJp2kData()