106#include <config_auto.h>
111#include "allheaders.h"
118static const l_int32 DefaultInputRes = 300;
128static l_int32
l_generatePdf(l_uint8 **pdata,
size_t *pnbytes,
130static void generateFixedStringsPdf(
L_PDF_DATA *lpd);
132static void generateMediaboxPdf(
L_PDF_DATA *lpd);
133static l_int32 generatePageStringPdf(
L_PDF_DATA *lpd);
134static l_int32 generateContentStringPdf(
L_PDF_DATA *lpd);
135static l_int32 generatePreXStringsPdf(
L_PDF_DATA *lpd);
136static l_int32 generateColormapStringsPdf(
L_PDF_DATA *lpd);
137static void generateTrailerPdf(
L_PDF_DATA *lpd);
138static char *makeTrailerStringPdf(
L_DNA *daloc);
143static char *generatePagesObjStringPdf(
NUMA *napage);
146static L_PDF_DATA *pdfdataCreate(
const char *title);
153static l_int32 var_WRITE_G4_IMAGE_MASK = 1;
155static l_int32 var_WRITE_DATE_AND_VERSION = 1;
157#define L_SMALLBUF 256
162#define DEBUG_MULTIPAGE 0
213l_int32 pixres, w, h, ret;
214l_float32 xpt, ypt, wpt, hpt;
219 return ERROR_INT(
"&data not defined", __func__, 1);
222 return ERROR_INT(
"&nbytes not defined", __func__, 1);
225 return ERROR_INT(
"pix not defined", __func__, 1);
228 selectDefaultPdfEncoding(pix, &type);
230 if (quality < 0 || quality > 100)
231 return ERROR_INT(
"invalid quality", __func__, 1);
242 return ERROR_INT(
"cid not made", __func__, 1);
251 res = (pixres > 0) ? pixres : DefaultInputRes;
252 xpt = x * 72.f / res;
253 ypt = y * 72.f / res;
254 wpt = w * 72.f / res;
255 hpt = h * 72.f / res;
259 if ((lpd = pdfdataCreate(title)) == NULL)
260 return ERROR_INT(
"lpd not made", __func__, 1);
262 if ((lpd = pdfdataCreate(title)) == NULL)
263 return ERROR_INT(
"lpd not made", __func__, 1);
270 ptraAdd(lpd->
cida, cid);
272 ptaAddPt(lpd->
xy, xpt, ypt);
273 ptaAddPt(lpd->
wh, wpt, hpt);
279 pdfdataDestroy(&lpd);
280 if (plpd) *plpd = NULL;
282 return ERROR_INT(
"pdf output not made", __func__, 1);
334char *fname, *str_pages, *str_trailer;
335l_uint8 *pdfdata, *data;
336l_int32 i, j, index, nobj, npages;
337l_int32 *sizes, *locs;
339L_BYTEA *bas, *bad, *bat1, *bat2;
340L_DNA *da_locs, *da_sizes, *da_outlocs, *da;
342NUMA *na_objs, *napage;
346 return ERROR_INT(
"&data not defined", __func__, 1);
349 return ERROR_INT(
"&nbytes not defined", __func__, 1);
352 return ERROR_INT(
"pa_data not defined", __func__, 1);
356 ptraGetActualCount(pa_data, &npages);
357 daa_locs = l_dnaaCreate(npages);
358 for (i = 0; i < npages; i++) {
359 bas = (
L_BYTEA *)ptraGetPtrToItem(pa_data, i);
362 l_byteaDestroy(&bas);
364 fname = sarrayGetString(sa, i,
L_NOCOPY);
365 L_ERROR(
"can't parse file %s; skipping\n", __func__, fname);
367 L_ERROR(
"can't parse file %d; skipping\n", __func__, i);
370 l_dnaaAddDna(daa_locs, da_locs,
L_INSERT);
375 ptraCompactArray(pa_data);
376 ptraGetActualCount(pa_data, &npages);
378 l_dnaaDestroy(&daa_locs);
379 return ERROR_INT(
"no parsable pdf files found", __func__, 1);
383 naa_objs = numaaCreate(npages);
384 napage = numaCreate(npages);
386 for (i = 0; i < npages; i++) {
387 da = l_dnaaGetDna(daa_locs, i,
L_CLONE);
388 nobj = l_dnaGetCount(da);
390 numaAddNumber(napage, 4);
391 na_objs = numaMakeSequence(0.0, 1.0, nobj - 1);
394 numaAddNumber(napage, index);
395 na_objs = numaMakeConstant(0.0, nobj - 1);
396 numaReplaceNumber(na_objs, 3, 3);
397 for (j = 4; j < nobj - 1; j++)
398 numaSetValue(na_objs, j, index++);
400 numaaAddNuma(naa_objs, na_objs,
L_INSERT);
405 str_pages = generatePagesObjStringPdf(napage);
408 bad = l_byteaCreate(5000);
409 da_outlocs = l_dnaCreate(0);
410 for (i = 0; i < npages; i++) {
411 bas = (
L_BYTEA *)ptraGetPtrToItem(pa_data, i);
412 pdfdata = l_byteaGetData(bas, &size);
413 da_locs = l_dnaaGetDna(daa_locs, i,
L_CLONE);
414 na_objs = numaaGetNuma(naa_objs, i,
L_CLONE);
415 nobj = l_dnaGetCount(da_locs) - 1;
416 da_sizes = l_dnaDiffAdjValues(da_locs);
417 sizes = l_dnaGetIArray(da_sizes);
418 locs = l_dnaGetIArray(da_locs);
420 l_byteaAppendData(bad, pdfdata, sizes[0]);
421 l_byteaAppendData(bad, pdfdata + locs[1], sizes[1]);
422 l_byteaAppendData(bad, pdfdata + locs[2], sizes[2]);
423 l_byteaAppendString(bad, str_pages);
424 for (j = 0; j < 4; j++)
425 l_dnaAddNumber(da_outlocs, locs[j]);
427 for (j = 4; j < nobj; j++) {
428 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
429 bat1 = l_byteaInitFromMem(pdfdata + locs[j], sizes[j]);
431 data = l_byteaGetData(bat2, &size);
432 l_byteaAppendData(bad, data, size);
433 l_byteaDestroy(&bat1);
434 l_byteaDestroy(&bat2);
437 l_dnaAddNumber(da_outlocs, l_byteaGetSize(bad));
440 l_dnaDestroy(&da_locs);
441 numaDestroy(&na_objs);
442 l_dnaDestroy(&da_sizes);
446 str_trailer = makeTrailerStringPdf(da_outlocs);
447 l_byteaAppendString(bad, str_trailer);
450 *pdata = l_byteaCopyData(bad, pnbytes);
451 l_byteaDestroy(&bad);
454 lept_stderr(
"******** object mapper **********");
455 numaaWriteStream(stderr, naa_objs);
457 lept_stderr(
"******** Page object numbers ***********");
458 numaWriteStderr(napage);
460 lept_stderr(
"******** Pages object ***********\n");
461 lept_stderr(
"%s\n", str_pages);
464 numaDestroy(&napage);
465 numaaDestroy(&naa_objs);
466 l_dnaDestroy(&da_outlocs);
467 l_dnaaDestroy(&daa_locs);
468 LEPT_FREE(str_pages);
469 LEPT_FREE(str_trailer);
498 if ((fp = fopenReadStream(filein)) == NULL)
499 return ERROR_INT_1(
"file not found", filein, __func__, 1);
500 istiff = fileFormatIsTiff(fp);
503 return ERROR_INT_1(
"file not tiff format", filein, __func__, 1);
505 pixa = pixaReadMultipageTiff(filein);
506 pixaConvertToPdf(pixa, 0, 1.0, 0, 0,
"weasel2", fileout);
553 return ERROR_INT(
"&cid not defined", __func__, 1);
556 return ERROR_INT(
"neither fname nor pix are defined", __func__, 1);
560 if (fname && strcmp(fname,
"-") != 0 && strcmp(fname,
"stdin") != 0) {
561 findFileFormat(fname, &format);
562 if (format == IFF_UNKNOWN)
563 L_WARNING(
"file %s format is unknown\n", __func__, fname);
564 if (format == IFF_PS || format == IFF_LPDF) {
565 L_ERROR(
"file %s is unsupported format %d\n",
566 __func__, fname, format);
569 if (format == IFF_JFIF_JPEG) {
571 }
else if (format == IFF_JP2) {
573 }
else if (format == IFF_PNG) {
581 pixt = pixRead(fname);
583 pixt = pixClone(pix);
585 return ERROR_INT(
"pixt not made", __func__, 1);
586 if (selectDefaultPdfEncoding(pixt, &type)) {
593 return ERROR_INT(
"cid not made from pix", __func__, 1);
631l_int32 format, d, bps, spp, iscmap;
636 return ERROR_INT(
"&cid not defined", __func__, 1);
639 return ERROR_INT(
"fname not defined", __func__, 1);
642 return ERROR_INT(
"invalid conversion type", __func__, 1);
643 if (ascii85 != 0 && ascii85 != 1)
644 return ERROR_INT(
"invalid ascii85", __func__, 1);
647 pixReadHeader(fname, &format, NULL, NULL, &bps, &spp, &iscmap);
651 L_WARNING(
"pixs has cmap; using flate encoding\n", __func__);
654 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
657 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
660 L_WARNING(
"pixs has > 1 bpp; using flate encoding\n", __func__);
665 if (format == IFF_JFIF_JPEG) {
668 if ((pix = pixRead(fname)) == NULL)
669 return ERROR_INT(
"pix not returned for JPEG", __func__, 1);
674 return ERROR_INT(
"jpeg data not made", __func__, 1);
676 if (format == IFF_JP2) {
679 if ((pix = pixRead(fname)) == NULL)
680 return ERROR_INT(
"pix not returned for JP2K", __func__, 1);
685 return ERROR_INT(
"jp2k data not made", __func__, 1);
687 if ((pix = pixRead(fname)) == NULL)
688 return ERROR_INT(
"pix not returned for G4", __func__, 1);
692 return ERROR_INT(
"g4 data not made", __func__, 1);
695 return ERROR_INT(
"flate data not made", __func__, 1);
697 return ERROR_INT(
"invalid conversion type", __func__, 1);
730l_uint8 *pngcomp = NULL;
731l_uint8 *datacomp = NULL;
732l_uint8 *cmapdata = NULL;
733char *cmapdatahex = NULL;
735l_int32 format, interlaced;
739l_int32 w, h, cmapflag;
741size_t nbytescomp = 0, nbytespng = 0;
748 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
750 findFileFormat(fname, &format);
754 if (format == IFF_PNG) {
755 isPngInterlaced(fname, &interlaced);
756 if (readHeaderPng(fname, NULL, NULL, &bps, &spp, NULL))
757 return (
L_COMP_DATA *)ERROR_PTR(
"bad png input", __func__, NULL);
768 if (format != IFF_PNG ||
769 (format == IFF_PNG && (interlaced || bps == 1 || spp == 4 || spp == 2)))
772 pix = pixRead(fname);
774 pix = pixClone(pixs);
776 return (
L_COMP_DATA *)ERROR_PTR(
"pix not made", __func__, NULL);
785 if ((fp = fopenReadStream(fname)) == NULL)
786 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
787 fname, __func__, NULL);
788 freadHeaderPng(fp, &w, &h, &bps, &spp, &cmapflag);
789 fgetPngResolution(fp, &xres, &yres);
797 if ((pngcomp = l_binaryRead(fname, &nbytespng)) == NULL)
798 return (
L_COMP_DATA *)ERROR_PTR_1(
"unable to read file",
799 fname, __func__, NULL);
807 if ((datacomp = (l_uint8 *)LEPT_CALLOC(1, nbytespng)) == NULL) {
809 return (
L_COMP_DATA *)ERROR_PTR(
"unable to allocate memory",
823 for (i = 16; i < nbytespng; i += 12) {
825 n = pngcomp[i - 8] << 24;
826 n += pngcomp[i - 7] << 16;
827 n += pngcomp[i - 6] << 8;
828 n += pngcomp[i - 5] << 0;
829 if (n >= nbytespng - i) {
832 pixcmapDestroy(&cmap);
833 L_ERROR(
"invalid png: i = %d, n = %d, nbytes = %zu\n", __func__,
839 if (memcmp(pngcomp + i - 4,
"IDAT", 4) == 0) {
840 memcpy(datacomp + nbytescomp, pngcomp + i, n);
845 if (cmapflag && !cmap &&
846 memcmp(pngcomp + i - 4,
"PLTE", 4) == 0) {
847 if ((n / 3) > (1 << bps)) {
850 pixcmapDestroy(&cmap);
851 L_ERROR(
"invalid png: i = %d, n = %d, cmapsize = %d\n",
852 __func__, i, n, (1 << bps));
855 cmap = pixcmapCreate(bps);
856 for (j = i; j < i + n; j += 3) {
857 pixcmapAddColor(cmap, pngcomp[j], pngcomp[j + 1],
865 if (nbytescomp == 0) {
867 pixcmapDestroy(&cmap);
868 return (
L_COMP_DATA *)ERROR_PTR(
"invalid PNG file", __func__, NULL);
874 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
875 pixcmapDestroy(&cmap);
878 return (
L_COMP_DATA *)ERROR_PTR(
"cmapdata not made",
881 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
930l_int32 w, h, xres, yres, bps, spp;
931size_t nbytes, nbytes85;
936 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
938 if (ascii85flag != 0 && ascii85flag != 1)
939 return (
L_COMP_DATA *)ERROR_PTR(
"wrong ascii85flags", __func__, NULL);
942 if (readHeaderJpeg(fname, &w, &h, &spp, NULL, NULL))
943 return (
L_COMP_DATA *)ERROR_PTR(
"bad jpeg metadata", __func__, NULL);
945 if ((fp = fopenReadStream(fname)) == NULL)
946 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
947 fname, __func__, NULL);
948 fgetJpegResolution(fp, &xres, &yres);
953 if ((data = l_binaryRead(fname, &nbytes)) == NULL)
954 return (
L_COMP_DATA *)ERROR_PTR_1(
"data not extracted",
955 fname, __func__, NULL);
958 if (ascii85flag == 1) {
959 data85 = encodeAscii85(data, nbytes, &nbytes85);
962 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
964 data85[nbytes85 - 1] =
'\0';
968 if (ascii85flag == 0) {
1003 l_int32 ascii85flag)
1006l_int32 w, h, xres, yres, bps, spp;
1011 return (
L_COMP_DATA *)ERROR_PTR(
"data not defined", __func__, NULL);
1014 if (readHeaderMemJpeg(data, nbytes, &w, &h, &spp, NULL, NULL)) {
1016 return (
L_COMP_DATA *)ERROR_PTR(
"bad jpeg metadata", __func__, NULL);
1019 readResolutionMemJpeg(data, nbytes, &xres, &yres);
1022 if (ascii85flag == 1) {
1023 data85 = encodeAscii85(data, nbytes, &nbytes85);
1026 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
1028 data85[nbytes85 - 1] =
'\0';
1032 if (ascii85flag == 0) {
1063l_int32 w, h, bps, spp, xres, yres;
1069 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1071 if (readHeaderJp2k(fname, &w, &h, &bps, &spp, NULL))
1072 return (
L_COMP_DATA *)ERROR_PTR(
"bad jp2k metadata", __func__, NULL);
1076 if ((cid->
datacomp = l_binaryRead(fname, &nbytes)) == NULL) {
1078 return (
L_COMP_DATA *)ERROR_PTR(
"data not extracted", __func__, NULL);
1082 if ((fp = fopenReadStream(fname)) != NULL) {
1083 fgetJp2kResolution(fp, &xres, &yres);
1114 l_int32 ascii85flag)
1116l_uint8 *datacomp = NULL;
1118l_int32 w, h, xres, yres, npages;
1120size_t nbytes85, nbytescomp;
1125 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1128 if ((fp = fopenReadStream(fname)) == NULL)
1129 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
1130 fname, __func__, NULL);
1131 tiffGetCount(fp, &npages);
1134 L_ERROR(
" %d page tiff; only works with 1 page (file: %s)\n", __func__, npages, fname);
1139 if ((fp = fopenReadStream(fname)) == NULL)
1140 return (
L_COMP_DATA *)ERROR_PTR_1(
"stream not opened",
1141 fname, __func__, NULL);
1142 getTiffResolution(fp, &xres, &yres);
1148 if (extractG4DataFromFile(fname, &datacomp, &nbytescomp,
1149 &w, &h, &minisblack)) {
1150 return (
L_COMP_DATA *)ERROR_PTR_1(
"datacomp not extracted",
1151 fname, __func__, NULL);
1155 if (ascii85flag == 1) {
1156 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1157 LEPT_FREE(datacomp);
1159 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
1161 data85[nbytes85 - 1] =
'\0';
1165 if (ascii85flag == 0) {
1210l_int32 w, h, d, maxAsp;
1214 return ERROR_INT(
"&cid not defined", __func__, 1);
1217 return ERROR_INT(
"pixs not defined", __func__, 1);
1220 selectDefaultPdfEncoding(pixs, &type);
1222 if (ascii85 != 0 && ascii85 != 1)
1223 return ERROR_INT(
"invalid ascii85", __func__, 1);
1224 pixGetDimensions(pixs, &w, &h, NULL);
1225 if (w == 0 || h == 0)
1226 return ERROR_INT(
"invalid w or h", __func__, 1);
1227 maxAsp = L_MAX(w / h, h / w);
1229 return ERROR_INT(
"max asperity > 10", __func__, 1);
1233#if defined(HAVE_LIBZ)
1234# if !defined(HAVE_LIBJPEG)
1236 L_WARNING(
"no libjpeg; using flate encoding\n", __func__);
1240# if !defined(HAVE_LIBJP2K)
1242 L_WARNING(
"no libjp2k; using flate encoding\n", __func__);
1246# if !defined(HAVE_LIBTIFF)
1248 L_WARNING(
"no libtiff; using flate encoding\n", __func__);
1255 d = pixGetDepth(pixs);
1256 cmap = pixGetColormap(pixs);
1258 L_WARNING(
"pixs has cmap; using flate encoding\n", __func__);
1261 L_WARNING(
"pixs has < 8 bpp; using flate encoding\n", __func__);
1264 L_WARNING(
"pixs has > 1 bpp; using flate encoding\n", __func__);
1270 return ERROR_INT(
"jpeg data not made", __func__, 1);
1273 return ERROR_INT(
"jp2k data not made", __func__, 1);
1276 return ERROR_INT(
"g4 data not made", __func__, 1);
1279 return ERROR_INT(
"flate data not made", __func__, 1);
1307 l_int32 ascii85flag)
1313 return (
L_COMP_DATA *)ERROR_PTR(
"fname not defined", __func__, NULL);
1315 if ((pixs = pixRead(fname)) == NULL)
1316 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not made", __func__, NULL);
1342 l_int32 ascii85flag)
1344l_uint8 *data = NULL;
1345l_uint8 *datacomp = NULL;
1347l_uint8 *cmapdata = NULL;
1348char *cmapdata85 = NULL;
1349char *cmapdatahex = NULL;
1353l_int32 w, h, d, cmapflag;
1354size_t ncmapbytes85 = 0;
1356size_t nbytes, nbytescomp;
1362 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1369 pixGetDimensions(pixs, &w, &h, &d);
1370 cmap = pixGetColormap(pixs);
1371 cmapflag = (cmap) ? 1 : 0;
1372 if (d == 2 || d == 4 || d == 16) {
1373 pixt = pixConvertTo8(pixs, cmapflag);
1374 cmap = pixGetColormap(pixt);
1375 d = pixGetDepth(pixt);
1376 }
else if (d == 32 && pixGetSpp(pixs) == 4) {
1377 pixt = pixAlphaBlendUniform(pixs, 0xffffff00);
1379 pixt = pixClone(pixs);
1382 return (
L_COMP_DATA *)ERROR_PTR(
"pixt not made", __func__, NULL);
1383 spp = (d == 32) ? 3 : 1;
1384 bps = (d == 32) ? 8 : d;
1389 pixcmapSerializeToMemory(cmap, 3, &ncolors, &cmapdata);
1392 return (
L_COMP_DATA *)ERROR_PTR(
"cmapdata not made",
1396 cmapdata85 = encodeAscii85(cmapdata, 3 * ncolors, &ncmapbytes85);
1397 cmapdatahex = pixcmapConvertToHex(cmapdata, ncolors);
1398 LEPT_FREE(cmapdata);
1402 pixGetRasterData(pixt, &data, &nbytes);
1405 LEPT_FREE(cmapdata85);
1406 LEPT_FREE(cmapdatahex);
1407 return (
L_COMP_DATA *)ERROR_PTR(
"data not returned", __func__, NULL);
1409 datacomp = zlibCompress(data, nbytes, &nbytescomp);
1412 LEPT_FREE(cmapdata85);
1413 LEPT_FREE(cmapdatahex);
1414 return (
L_COMP_DATA *)ERROR_PTR(
"datacomp not made", __func__, NULL);
1418 if (ascii85flag == 1) {
1419 data85 = encodeAscii85(datacomp, nbytescomp, &nbytes85);
1420 LEPT_FREE(datacomp);
1422 LEPT_FREE(cmapdata85);
1423 LEPT_FREE(cmapdatahex);
1424 return (
L_COMP_DATA *)ERROR_PTR(
"data85 not made", __func__, NULL);
1426 data85[nbytes85 - 1] =
'\0';
1431 if (ascii85flag == 0) {
1446 cid->
res = pixGetXRes(pixs);
1470 l_int32 ascii85flag,
1478 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1479 if (pixGetColormap(pixs))
1480 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1481 d = pixGetDepth(pixs);
1482 if (d != 8 && d != 16 && d != 32)
1483 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 8, 16 or 32 bpp",
1487 fname = l_makeTempFilename();
1488 if (pixWriteJpeg(fname, pixs, quality, 0)) {
1495 if (lept_rmfile(fname) != 0)
1496 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1525 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1526 if (pixGetColormap(pixs))
1527 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1528 d = pixGetDepth(pixs);
1529 if (d != 8 && d != 32)
1530 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 8 or 32 bpp", __func__, NULL);
1533 fname = l_makeTempFilename();
1534 if (pixWriteJp2k(fname, pixs, quality, 5, 0, 0)) {
1541 if (lept_rmfile(fname) != 0)
1542 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1564 l_int32 ascii85flag)
1570 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not defined", __func__, NULL);
1571 if (pixGetDepth(pixs) != 1)
1572 return (
L_COMP_DATA *)ERROR_PTR(
"pixs not 1 bpp", __func__, NULL);
1573 if (pixGetColormap(pixs))
1574 return (
L_COMP_DATA *)ERROR_PTR(
"pixs has colormap", __func__, NULL);
1577 fname = l_makeTempFilename();
1578 if (pixWrite(fname, pixs, IFF_TIFF_G4)) {
1584 if (lept_rmfile(fname) != 0)
1585 L_ERROR(
"temp file %s was not deleted\n", __func__, fname);
1616 if (!pdata || !pnbytes)
1617 return ERROR_INT(
"&data and &nbytes not both defined", __func__, 1);
1621 return ERROR_INT(
"cid not defined", __func__, 1);
1626 res = DefaultInputRes;
1627 wpt = cid->
w * 72.f / res;
1628 hpt = cid->
h * 72.f / res;
1631 if ((lpd = pdfdataCreate(title)) == NULL)
1632 return ERROR_INT(
"lpd not made", __func__, 1);
1633 ptraAdd(lpd->
cida, cid);
1635 ptaAddPt(lpd->
xy, 0, 0);
1636 ptaAddPt(lpd->
wh, wpt, hpt);
1640 pdfdataDestroy(&lpd);
1642 return ERROR_INT(
"pdf output not made", __func__, 1);
1659 L_WARNING(
"ptr address is null!\n", __func__);
1662 if ((cid = *pcid) == NULL)
1704 return ERROR_INT(
"&data not defined", __func__, 1);
1707 return ERROR_INT(
"&nbytes not defined", __func__, 1);
1710 return ERROR_INT(
"lpd not defined", __func__, 1);
1712 generateFixedStringsPdf(lpd);
1713 generateMediaboxPdf(lpd);
1714 generatePageStringPdf(lpd);
1715 generateContentStringPdf(lpd);
1716 generatePreXStringsPdf(lpd);
1717 generateColormapStringsPdf(lpd);
1718 generateTrailerPdf(lpd);
1726char buf[L_SMALLBUF];
1727char *version, *datestr;
1731 lpd->
id = stringNew(
"%PDF-1.5\n");
1732 l_dnaAddNumber(lpd->
objsize, strlen(lpd->
id));
1734 lpd->
obj1 = stringNew(
"1 0 obj\n"
1742 sa = sarrayCreate(0);
1743 sarrayAddString(sa,
"2 0 obj\n"
1745 if (var_WRITE_DATE_AND_VERSION) {
1746 datestr = l_getFormattedDate();
1747 snprintf(buf,
sizeof(buf),
"/CreationDate (D:%s)\n", datestr);
1748 sarrayAddString(sa, buf,
L_COPY);
1750 version = getLeptonicaVersion();
1751 snprintf(buf,
sizeof(buf),
1752 "/Producer (leptonica: %s)\n", version);
1755 snprintf(buf,
sizeof(buf),
"/Producer (leptonica)\n");
1757 sarrayAddString(sa, buf,
L_COPY);
1761 snprintf(buf,
sizeof(buf),
"/Title %s\n", hexstr);
1762 sarrayAddString(sa, buf,
L_COPY);
1764 L_ERROR(
"title string is not ascii\n", __func__);
1768 sarrayAddString(sa,
">>\n"
1770 lpd->
obj2 = sarrayToString(sa, 0);
1774 lpd->
obj3 = stringNew(
"3 0 obj\n"
1811l_int32 i, nchar, buflen;
1814 return (
char *)ERROR_PTR(
"str not defined", __func__, NULL);
1815 nchar = strlen(str);
1816 for (i = 0; i < nchar; i++) {
1818 return (
char *)ERROR_PTR(
"str not all ascii", __func__, NULL);
1821 buflen = 4 * nchar + 10;
1822 buffer = (
char *)LEPT_CALLOC(buflen,
sizeof(
char));
1823 stringCat(buffer, buflen,
"<feff");
1824 for (i = 0; i < nchar; i++) {
1825 snprintf(smallbuf,
sizeof(smallbuf),
"%04x", str[i]);
1826 stringCat(buffer, buflen, smallbuf);
1828 stringCat(buffer, buflen,
">");
1837l_float32 xpt, ypt, wpt, hpt, maxx, maxy;
1842 for (i = 0; i < lpd->
n; i++) {
1843 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1844 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1845 maxx = L_MAX(maxx, xpt + wpt);
1846 maxy = L_MAX(maxy, ypt + hpt);
1849 lpd->
mediabox = boxCreate(0, 0, (l_int32)(maxx + 0.5),
1850 (l_int32)(maxy + 0.5));
1856 for (i = 0; i < lpd->
n; i++) {
1857 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1858 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1859 ptaSetPt(lpd->
xy, i, xpt, maxy - ypt - hpt);
1869l_int32 bufsize, i, wpt, hpt;
1875 bufsize = 1000 + 50 * lpd->
n;
1876 if ((buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char))) == NULL)
1877 return ERROR_INT(
"calloc fail for buf", __func__, 1);
1879 boxGetGeometry(lpd->
mediabox, NULL, NULL, &wpt, &hpt);
1880 sa = sarrayCreate(lpd->
n);
1881 for (i = 0; i < lpd->
n; i++) {
1882 snprintf(buf, bufsize,
"/Im%d %d 0 R ", i + 1, 6 + i);
1883 sarrayAddString(sa, buf,
L_COPY);
1885 xstr = sarrayToString(sa, 0);
1889 return ERROR_INT(
"xstr not made", __func__, 1);
1892 snprintf(buf, bufsize,
"4 0 obj\n"
1896 "/MediaBox [%d %d %d %d]\n"
1900 "/XObject << %s >>\n"
1901 "/ProcSet [ /ImageB /ImageI /ImageC ]\n"
1905 0, 0, wpt, hpt, xstr);
1907 lpd->
obj4 = stringNew(buf);
1922l_float32 xpt, ypt, wpt, hpt;
1925 bufsize = 1000 + 200 * lpd->
n;
1926 if ((buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char))) == NULL)
1927 return ERROR_INT(
"calloc fail for buf", __func__, 1);
1929 sa = sarrayCreate(lpd->
n);
1930 for (i = 0; i < lpd->
n; i++) {
1931 ptaGetPt(lpd->
xy, i, &xpt, &ypt);
1932 ptaGetPt(lpd->
wh, i, &wpt, &hpt);
1933 snprintf(buf, bufsize,
1934 "q %.4f %.4f %.4f %.4f %.4f %.4f cm /Im%d Do Q\n",
1935 wpt, 0.0, 0.0, hpt, xpt, ypt, i + 1);
1936 sarrayAddString(sa, buf,
L_COPY);
1938 cstr = sarrayToString(sa, 0);
1942 return ERROR_INT(
"cstr not made", __func__, 1);
1945 snprintf(buf, bufsize,
"5 0 obj\n"
1946 "<< /Length %d >>\n"
1951 (l_int32)strlen(cstr), cstr);
1953 lpd->
obj5 = stringNew(buf);
1967char *cstr, *bstr, *fstr, *pstr, *xstr, *photometry;
1973 cmindex = 6 + lpd->
n;
1974 for (i = 0; i < lpd->
n; i++) {
1976 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
1977 return ERROR_INT(
"cid not found", __func__, 1);
1980 if (var_WRITE_G4_IMAGE_MASK) {
1981 cstr = stringNew(
"/ImageMask true\n"
1982 "/ColorSpace /DeviceGray");
1984 cstr = stringNew(
"/ColorSpace /DeviceGray");
1986 bstr = stringNew(
"/BitsPerComponent 1\n"
1987 "/Interpolate true");
1994 photometry = (cid->
minisblack) ? stringNew(
"true")
1995 : stringNew(
"false");
1996 snprintf(buff,
sizeof(buff),
1997 "/Filter /CCITTFaxDecode\n"
2003 ">>", photometry, cid->
w);
2004 fstr = stringNew(buff);
2005 LEPT_FREE(photometry);
2008 cstr = stringNew(
"/ColorSpace /DeviceGray");
2009 else if (cid->
spp == 3)
2010 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2011 else if (cid->
spp == 4)
2012 cstr = stringNew(
"/ColorSpace /DeviceCMYK");
2014 L_ERROR(
"in jpeg: spp != 1, 3 or 4\n", __func__);
2015 bstr = stringNew(
"/BitsPerComponent 8");
2016 fstr = stringNew(
"/Filter /DCTDecode");
2019 cstr = stringNew(
"/ColorSpace /DeviceGray");
2020 else if (cid->
spp == 3)
2021 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2023 L_ERROR(
"in jp2k: spp != 1 && spp != 3\n", __func__);
2024 bstr = stringNew(
"/BitsPerComponent 8");
2025 fstr = stringNew(
"/Filter /JPXDecode");
2028 snprintf(buff,
sizeof(buff),
"/ColorSpace %d 0 R", cmindex++);
2029 cstr = stringNew(buff);
2031 if (cid->
spp == 1 && cid->
bps == 1)
2032 cstr = stringNew(
"/ColorSpace /DeviceGray\n"
2034 else if (cid->
spp == 1)
2035 cstr = stringNew(
"/ColorSpace /DeviceGray");
2036 else if (cid->
spp == 3)
2037 cstr = stringNew(
"/ColorSpace /DeviceRGB");
2039 L_ERROR(
"unknown colorspace: spp = %d\n",
2040 __func__, cid->
spp);
2042 snprintf(buff,
sizeof(buff),
"/BitsPerComponent %d", cid->
bps);
2043 bstr = stringNew(buff);
2044 fstr = stringNew(
"/Filter /FlateDecode");
2046 snprintf(buff,
sizeof(buff),
2052 " /BitsPerComponent %d\n"
2053 ">>\n", cid->
w, cid->
spp, cid->
bps);
2054 pstr = stringNew(buff);
2058 pstr = stringNew(
"");
2060 snprintf(buf,
sizeof(buf),
2074 cid->
w, cid->
h, bstr, fstr, pstr);
2075 xstr = stringNew(buf);
2076 sarrayAddString(sa, xstr,
L_INSERT);
2094l_int32 i, cmindex, ncmap;
2102 cmindex = 6 + lpd->
n;
2104 for (i = 0; i < lpd->
n; i++) {
2105 if ((cid = pdfdataGetCid(lpd, i)) == NULL)
2106 return ERROR_INT(
"cid not found", __func__, 1);
2107 if (cid->
ncolors == 0)
continue;
2110 snprintf(buf,
sizeof(buf),
"%d 0 obj\n"
2111 "[ /Indexed /DeviceRGB\n"
2118 cmstr = stringNew(buf);
2119 l_dnaAddNumber(lpd->
objsize, strlen(cmstr));
2120 sarrayAddString(sa, cmstr,
L_INSERT);
2131l_int32 i, n, size, linestart;
2132L_DNA *daloc, *dasize;
2155 l_dnaAddNumber(daloc, linestart);
2156 n = l_dnaGetCount(dasize);
2157 for (i = 0; i < n; i++) {
2158 l_dnaGetIValue(dasize, i, &size);
2160 l_dnaAddNumber(daloc, linestart);
2162 l_dnaGetIValue(daloc, n, &lpd->
xrefloc);
2165 lpd->
trailer = makeTrailerStringPdf(daloc);
2170makeTrailerStringPdf(
L_DNA *daloc)
2174l_int32 i, n, linestart, xrefloc;
2178 return (
char *)ERROR_PTR(
"daloc not defined", __func__, NULL);
2179 n = l_dnaGetCount(daloc) - 1;
2181 sa = sarrayCreate(0);
2182 snprintf(buf,
sizeof(buf),
"xref\n"
2184 "0000000000 65535 f \n", n);
2185 sarrayAddString(sa, buf,
L_COPY);
2186 for (i = 1; i < n; i++) {
2187 l_dnaGetIValue(daloc, i, &linestart);
2188 snprintf(buf,
sizeof(buf),
"%010d 00000 n \n", linestart);
2189 sarrayAddString(sa, buf,
L_COPY);
2192 l_dnaGetIValue(daloc, n, &xrefloc);
2193 snprintf(buf,
sizeof(buf),
"trailer\n"
2201 "%%%%EOF\n", n, xrefloc);
2202 sarrayAddString(sa, buf,
L_COPY);
2203 outstr = sarrayToString(sa, 0);
2229l_int32 nimages, i, len;
2230l_int32 *sizes, *locs;
2235 return ERROR_INT(
"&data not defined", __func__, 1);
2238 return ERROR_INT(
"&nbytes not defined", __func__, 1);
2241 if ((data = (l_uint8 *)LEPT_CALLOC(nbytes,
sizeof(l_uint8))) == NULL)
2242 return ERROR_INT(
"calloc fail for data", __func__, 1);
2245 sizes = l_dnaGetIArray(lpd->
objsize);
2246 locs = l_dnaGetIArray(lpd->
objloc);
2247 memcpy(data, lpd->
id, sizes[0]);
2248 memcpy(data + locs[1], lpd->
obj1, sizes[1]);
2249 memcpy(data + locs[2], lpd->
obj2, sizes[2]);
2250 memcpy(data + locs[3], lpd->
obj3, sizes[3]);
2251 memcpy(data + locs[4], lpd->
obj4, sizes[4]);
2252 memcpy(data + locs[5], lpd->
obj5, sizes[5]);
2257 for (i = 0; i < nimages; i++) {
2258 if ((cid = pdfdataGetCid(lpd, i)) == NULL) {
2261 return ERROR_INT(
"cid not found", __func__, 1);
2265 memcpy(data + locs[6 + i], str, len);
2266 memcpy(data + locs[6 + i] + len,
2268 memcpy(data + locs[6 + i] + len + cid->
nbytescomp,
2273 for (i = 0; i < lpd->
ncmap; i++) {
2275 memcpy(data + locs[6 + nimages + i], str, strlen(str));
2303l_int32 i, j, start, startloc, xrefloc, found, loc, nobj, objno, trailer_ok;
2305L_DNA *da, *daobj, *daxref;
2309 return ERROR_INT(
"&da not defined", __func__, 1);
2312 return ERROR_INT(
"bas not defined", __func__, 1);
2313 data = l_byteaGetData(bas, &size);
2314 if (memcmp(data,
"%PDF-1.", 7) != 0)
2315 return ERROR_INT(
"PDF header signature not found", __func__, 1);
2321 arrayFindSequence(data + start, size - start,
2322 (l_uint8 *)
"startxref\n", 10, &loc, &found);
2324 return ERROR_INT(
"startxref not found!", __func__, 1);
2325 if (sscanf((
char *)(data + start + loc + 10),
"%d\n", &xrefloc) != 1)
2326 return ERROR_INT(
"xrefloc not found!", __func__, 1);
2327 if (xrefloc < 0 || xrefloc >= size)
2328 return ERROR_INT(
"invalid xrefloc!", __func__, 1);
2329 sa = sarrayCreateLinesFromString((
char *)(data + xrefloc), 0);
2330 str = sarrayGetString(sa, 1,
L_NOCOPY);
2331 if ((sscanf(str,
"0 %d", &nobj)) != 1) {
2333 return ERROR_INT(
"nobj not found", __func__, 1);
2338 da = l_dnaCreate(nobj + 1);
2340 for (i = 0; i < nobj; i++) {
2341 str = sarrayGetString(sa, i + 2,
L_NOCOPY);
2342 sscanf(str,
"%d", &startloc);
2343 l_dnaAddNumber(da, startloc);
2345 l_dnaAddNumber(da, xrefloc);
2348 lept_stderr(
"************** Trailer string ************\n");
2349 lept_stderr(
"xrefloc = %d", xrefloc);
2350 sarrayWriteStderr(sa);
2352 lept_stderr(
"************** Object locations ************");
2353 l_dnaWriteStderr(da);
2359 for (i = 1; i < nobj; i++) {
2360 l_dnaGetIValue(da, i, &startloc);
2361 if ((sscanf((
char *)(data + startloc),
"%d 0 obj", &objno)) != 1) {
2362 L_ERROR(
"bad trailer for object %d\n", __func__, i);
2370 L_INFO(
"rebuilding pdf trailer\n", __func__);
2372 l_dnaAddNumber(da, 0);
2373 l_byteaFindEachSequence(bas, (l_uint8 *)
" 0 obj\n", 7, &daobj);
2374 nobj = l_dnaGetCount(daobj);
2375 for (i = 0; i < nobj; i++) {
2376 l_dnaGetIValue(daobj, i, &loc);
2377 for (j = loc - 1; j > 0; j--) {
2381 l_dnaAddNumber(da, j + 1);
2383 l_byteaFindEachSequence(bas, (l_uint8 *)
"xref", 4, &daxref);
2384 l_dnaGetIValue(daxref, 0, &loc);
2385 l_dnaAddNumber(da, loc);
2386 l_dnaDestroy(&daobj);
2387 l_dnaDestroy(&daxref);
2395generatePagesObjStringPdf(
NUMA *napage)
2399l_int32 i, n, index, bufsize;
2403 return (
char *)ERROR_PTR(
"napage not defined", __func__, NULL);
2405 n = numaGetCount(napage);
2406 bufsize = 100 + 16 * n;
2407 buf = (
char *)LEPT_CALLOC(bufsize,
sizeof(
char));
2408 sa = sarrayCreate(n);
2409 for (i = 0; i < n; i++) {
2410 numaGetIValue(napage, i, &index);
2411 snprintf(buf, bufsize,
" %d 0 R ", index);
2412 sarrayAddString(sa, buf,
L_COPY);
2415 str = sarrayToString(sa, 0);
2416 snprintf(buf, bufsize - 1,
"3 0 obj\n"
2454l_int32 start, nrepl, i, j, nobjs, objin, objout, found;
2455l_int32 *objs, *matches;
2461 return (
L_BYTEA *)ERROR_PTR(
"bas not defined", __func__, NULL);
2463 return (
L_BYTEA *)ERROR_PTR(
"na_objs not defined", __func__, NULL);
2465 datas = l_byteaGetData(bas, &size);
2466 bad = l_byteaCreate(100);
2467 objs = numaGetIArray(na_objs);
2468 nobjs = numaGetCount(na_objs);
2471 sscanf((
char *)datas,
"%d", &objin);
2472 if (objin < 0 || objin >= nobjs) {
2473 L_ERROR(
"index %d into array of size %d\n", __func__, objin, nobjs);
2477 objout = objs[objin];
2478 snprintf((
char *)buf, 32,
"%d", objout);
2479 l_byteaAppendString(bad, (
char *)buf);
2482 arrayFindSequence(datas, size, &space, 1, &start, &found);
2483 da_match = arrayFindEachSequence(datas, size, (l_uint8 *)
" 0 R", 4);
2485 l_byteaAppendData(bad, datas + start, size - start);
2491 nrepl = l_dnaGetCount(da_match);
2492 matches = l_dnaGetIArray(da_match);
2493 for (i = 0; i < nrepl; i++) {
2495 for (j = matches[i] - 1; j > 0; j--) {
2496 if (datas[j] == space)
2500 l_byteaAppendData(bad, datas + start, j - start + 1);
2501 sscanf((
char *)(datas + j + 1),
"%d", &objin);
2502 if (objin < 0 || objin >= nobjs) {
2503 L_ERROR(
"index %d into array of size %d\n", __func__, objin, nobjs);
2506 l_dnaDestroy(&da_match);
2509 objout = objs[objin];
2510 snprintf((
char *)buf, 32,
"%d", objout);
2511 l_byteaAppendString(bad, (
char *)buf);
2514 l_byteaAppendData(bad, datas + start, size - start);
2518 l_dnaDestroy(&da_match);
2527pdfdataCreate(
const char *title)
2532 if (title) lpd->
title = stringNew(title);
2533 lpd->
cida = ptraCreate(10);
2534 lpd->
xy = ptaCreate(10);
2535 lpd->
wh = ptaCreate(10);
2536 lpd->
saprex = sarrayCreate(10);
2537 lpd->
sacmap = sarrayCreate(10);
2538 lpd->
objsize = l_dnaCreate(20);
2539 lpd->
objloc = l_dnaCreate(20);
2551 L_WARNING(
"ptr address is null!\n", __func__);
2554 if ((lpd = *plpd) == NULL)
2558 for (i = 0; i < lpd->
n; i++) {
2563 ptraDestroy(&lpd->
cida, 0, 0);
2564 if (lpd->
id) LEPT_FREE(lpd->
id);
2565 if (lpd->
obj1) LEPT_FREE(lpd->
obj1);
2566 if (lpd->
obj2) LEPT_FREE(lpd->
obj2);
2567 if (lpd->
obj3) LEPT_FREE(lpd->
obj3);
2568 if (lpd->
obj4) LEPT_FREE(lpd->
obj4);
2569 if (lpd->
obj5) LEPT_FREE(lpd->
obj5);
2572 if (lpd->
xy) ptaDestroy(&lpd->
xy);
2573 if (lpd->
wh) ptaDestroy(&lpd->
wh);
2589 return (
L_COMP_DATA *)ERROR_PTR(
"lpd not defined", __func__, NULL);
2590 if (index < 0 || index >= lpd->
n)
2591 return (
L_COMP_DATA *)ERROR_PTR(
"invalid image index", __func__, NULL);
2623l_int32 format, loc, ret, npages, found;
2627 return ERROR_INT(
"&npages not defined", __func__, 1);
2630 return ERROR_INT(
"fname not defined", __func__, 1);
2633 findFileFormat(fname, &format);
2634 if (format != IFF_LPDF)
2635 return ERROR_INT(
"file is not pdf", __func__, 1);
2638 if ((data = l_binaryReadSelect(fname, 0, 10000, &nread))
2640 return ERROR_INT(
"partial data not read", __func__, 1);
2645 arrayFindSequence(data, nread, (
const l_uint8 *)
"/Count",
2646 strlen(
"/Count"), &loc, &found);
2648 lept_stderr(
"Reading entire file looking for '/Count'\n");
2650 if ((data = l_binaryRead(fname, &nread)) == NULL)
2651 return ERROR_INT(
"full data not read", __func__, 1);
2652 arrayFindSequence(data, nread, (
const l_uint8 *)
"/Count",
2653 strlen(
"/Count"), &loc, &found);
2656 L_WARNING(
"/Count not found\n", __func__);
2662 if (nread - loc < 12) {
2664 return ERROR_INT(
"data may not include page count field", __func__, 1);
2668 ret = sscanf((
char *)&data[loc],
"/Count %d", &npages);
2671 return ERROR_INT(
"npages not found", __func__, 1);
2710l_int32 i, nw, nh, format, ret, loc, width, height;
2718 if (pnaw) *pnaw = NULL;
2719 if (pnah) *pnah = NULL;
2720 if (pmedw) *pmedw = 0;
2721 if (pmedh) *pmedh = 0;
2722 if (!pnaw && !pnah && !pmedw && !pmedh)
2723 return ERROR_INT(
"no output requested", __func__, 1);
2725 return ERROR_INT(
"fname not defined", __func__, 1);
2728 findFileFormat(fname, &format);
2729 if (format != IFF_LPDF)
2730 return ERROR_INT(
"file is not pdf", __func__, 1);
2734 if ((data = l_binaryRead(fname, &nread)) == NULL)
2735 return ERROR_INT(
"full data not read", __func__, 1);
2736 dnaw = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/Width",
2738 dnah = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/Height",
2741 L_WARNING(
"unable to find widths\n", __func__);
2743 L_WARNING(
"unable to find heights\n", __func__);
2744 if (!dnaw && !dnah) {
2746 L_WARNING(
"no fields found\n", __func__);
2751 nw = l_dnaGetCount(dnaw);
2752 naw = numaCreate(nw);
2753 for (i = 0; i < nw; i++) {
2754 l_dnaGetIValue(dnaw, i, &loc);
2755 ret = sscanf((
char *)&data[loc],
"/Width %d", &width);
2757 L_ERROR(
"width not found for item %d at loc %d\n",
2761 numaAddNumber(naw, width);
2763 nh = l_dnaGetCount(dnah);
2764 nah = numaCreate(nh);
2765 for (i = 0; i < nh; i++) {
2766 l_dnaGetIValue(dnah, i, &loc);
2767 ret = sscanf((
char *)&data[loc],
"/Height %d", &height);
2769 L_ERROR(
"height not found for item %d at loc %d\n",
2773 numaAddNumber(nah, height);
2777 l_dnaDestroy(&dnaw);
2778 l_dnaDestroy(&dnah);
2780 numaGetMedian(naw, &fval);
2781 *pmedw = lept_roundftoi(fval);
2788 numaGetMedian(nah, &fval);
2789 *pmedh = lept_roundftoi(fval);
2833l_int32 i, n, format, ret, loc;
2834l_float32 fval, ignore1, ignore2, w, h;
2840 if (pnaw) *pnaw = NULL;
2841 if (pnah) *pnah = NULL;
2842 if (pmedw) *pmedw = 0;
2843 if (pmedh) *pmedh = 0;
2844 if (!pnaw && !pnah && !pmedw && !pmedh)
2845 return ERROR_INT(
"no output requested", __func__, 1);
2847 return ERROR_INT(
"fname not defined", __func__, 1);
2850 findFileFormat(fname, &format);
2851 if (format != IFF_LPDF)
2852 return ERROR_INT(
"file is not pdf", __func__, 1);
2855 if ((data = l_binaryRead(fname, &nread)) == NULL)
2856 return ERROR_INT(
"full data not read", __func__, 1);
2857 dna = arrayFindEachSequence(data, nread, (
const l_uint8 *)
"/MediaBox",
2858 strlen(
"/MediaBox"));
2861 L_WARNING(
"no mediaboxes found\n", __func__);
2866 n = l_dnaGetCount(dna);
2867 naw = numaCreate(n);
2868 nah = numaCreate(n);
2869 for (i = 0; i < n; i++) {
2870 l_dnaGetIValue(dna, i, &loc);
2871 ret = sscanf((
char *)&data[loc],
"/MediaBox [ %f %f %f %f",
2872 &ignore1, &ignore2, &w, &h);
2874 L_ERROR(
"mediabox sizes not found for item %d at loc %d\n",
2878 numaAddNumber(naw, w);
2879 numaAddNumber(nah, h);
2885 numaGetMedian(naw, &fval);
2886 *pmedw = lept_roundftoi(fval);
2887 if (*pmedw > 850) lept_stderr(
"oversize width: %d\n", *pmedw);
2894 numaGetMedian(nah, &fval);
2895 *pmedh = lept_roundftoi(fval);
2896 if (*pmedh > 850) lept_stderr(
"oversize height: %d\n", *pmedh);
2943char *tail, *basename, *fname;
2944l_int32 ret, res, medw, medh, medmax, npages, pageno, w, h;
2948 return ERROR_INT(
"&res not defined", __func__, 1);
2952 L_INFO(
"Requires pdftoppm, so this is disabled on windows.\n"
2953 "Returns default resolution 300 ppi", __func__);
2958 L_INFO(
"Running pdftoppm is disabled; "
2959 "use setLeptDebugOK(1) to enable\n",
2960 "returns default resolution 300 ppi\n", __func__);
2965 return ERROR_INT(
"infile not defined", __func__, 1);
2967 return ERROR_INT(
"outdir not defined", __func__, 1);
2972 lept_stderr(
"Media Box medians: medw = %d, medh = %d\n", medw, medh);
2973 medmax = L_MAX(medw, medh);
2975 res = 300 * ((l_float32)792 / (l_float32)medmax);
2976 lept_stderr(
" Oversize media box; use resolution = %d\n", res);
2983 lept_stderr(
"Media Box dimensions not found\n");
2985 pageno = (npages > 0) ? (npages + 1) / 2 : 1;
2986 splitPathAtDirectory(infile, NULL, &tail);
2987 splitPathAtExtension(tail, &basename, NULL);
2988 snprintf(buf,
sizeof(buf),
"pdftoppm -f %d -l %d -r 72 %s %s/%s",
2989 pageno, pageno, infile, outdir, basename);
2991 LEPT_FREE(basename);
2992 callSystemDebug(buf);
2995 sa = getSortedPathnamesInDirectory(outdir, NULL, 0, 0);
2996 fname = sarrayGetString(sa, 0,
L_NOCOPY);
2997 pixReadHeader(fname, NULL, &w, &h, NULL, NULL, NULL);
2999 if (w > 0 && h > 0) {
3000 res = L_MIN((72 * 3300 / L_MAX(w, h)), 600);
3002 lept_stderr(
"Use resolution = %d\n", res);
3004 L_ERROR(
"page size not found; assuming res = 300\n", __func__);
3031 var_WRITE_G4_IMAGE_MASK = flag;
3051 var_WRITE_DATE_AND_VERSION = flag;
void l_CIDataDestroy(L_COMP_DATA **pcid)
l_CIDataDestroy()
L_COMP_DATA * l_generateJpegDataMem(l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
l_generateJpegDataMem()
l_ok pixGenerateCIData(PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
pixGenerateCIData()
l_ok getPdfRendererResolution(const char *infile, const char *outdir, l_int32 *pres)
getPdfRendererResolution()
static L_COMP_DATA * pixGenerateFlateData(PIX *pixs, l_int32 ascii85flag)
pixGenerateFlateData()
L_COMP_DATA * l_generateFlateData(const char *fname, l_int32 ascii85flag)
l_generateFlateData()
l_ok pixConvertToPdfData(PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
pixConvertToPdfData()
L_COMP_DATA * l_generateJpegData(const char *fname, l_int32 ascii85flag)
l_generateJpegData()
static char * generateEscapeString(const char *str)
generateEscapeString()
L_COMP_DATA * l_generateG4Data(const char *fname, l_int32 ascii85flag)
l_generateG4Data()
static L_COMP_DATA * pixGenerateJpegData(PIX *pixs, l_int32 ascii85flag, l_int32 quality)
pixGenerateJpegData()
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
static L_BYTEA * substituteObjectNumbers(L_BYTEA *bas, NUMA *na_objs)
substituteObjectNumbers()
l_ok getPdfMediaBoxSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfMediaBoxSizes()
l_ok getPdfPageCount(const char *fname, l_int32 *pnpages)
getPdfPageCount()
l_ok convertTiffMultipageToPdf(const char *filein, const char *fileout)
convertTiffMultipageToPdf()
l_ok l_generateCIData(const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
l_generateCIData()
l_ok ptraConcatenatePdfToData(L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
ptraConcatenatePdfToData()
static l_int32 parseTrailerPdf(L_BYTEA *bas, L_DNA **pda)
parseTrailerPdf()
static L_COMP_DATA * pixGenerateG4Data(PIX *pixs, l_int32 ascii85flag)
pixGenerateG4Data()
l_ok getPdfPageSizes(const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
getPdfPageSizes()
void l_pdfSetG4ImageMask(l_int32 flag)
l_pdfSetG4ImageMask()
l_ok cidConvertToPdfData(L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
cidConvertToPdfData()
static L_COMP_DATA * l_generateJp2kData(const char *fname)
l_generateJp2kData()
L_COMP_DATA * l_generateFlateDataPdf(const char *fname, PIX *pixs)
l_generateFlateDataPdf()
static l_int32 l_generatePdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
l_generatePdf()
l_ok l_generateCIDataForPdf(const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
l_generateCIDataForPdf()
static l_int32 generateOutputDataPdf(l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
generateOutputDataPdf()
static L_COMP_DATA * pixGenerateJp2kData(PIX *pixs, l_int32 quality)
pixGenerateJp2kData()