Leptonica 1.85.0
Image processing and image analysis suite
Loading...
Searching...
No Matches
pdfio2.c File Reference
#include <string.h>
#include <math.h>
#include "allheaders.h"

Go to the source code of this file.

Macros

#define L_SMALLBUF   256
 
#define L_BIGBUF   2048 /* must be able to hold hex colormap */
 
#define DEBUG_MULTIPAGE   0
 

Functions

static L_COMP_DATAl_generateJp2kData (const char *fname)
 
static L_COMP_DATApixGenerateFlateData (PIX *pixs, l_int32 ascii85flag)
 
static L_COMP_DATApixGenerateJpegData (PIX *pixs, l_int32 ascii85flag, l_int32 quality)
 
static L_COMP_DATApixGenerateJp2kData (PIX *pixs, l_int32 quality)
 
static L_COMP_DATApixGenerateG4Data (PIX *pixs, l_int32 ascii85flag)
 
static l_int32 l_generatePdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
 
static void generateFixedStringsPdf (L_PDF_DATA *lpd)
 
static char * generateEscapeString (const char *str)
 
static void generateMediaboxPdf (L_PDF_DATA *lpd)
 
static l_int32 generatePageStringPdf (L_PDF_DATA *lpd)
 
static l_int32 generateContentStringPdf (L_PDF_DATA *lpd)
 
static l_int32 generatePreXStringsPdf (L_PDF_DATA *lpd)
 
static l_int32 generateColormapStringsPdf (L_PDF_DATA *lpd)
 
static void generateTrailerPdf (L_PDF_DATA *lpd)
 
static char * makeTrailerStringPdf (L_DNA *daloc)
 
static l_int32 generateOutputDataPdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd)
 
static l_int32 parseTrailerPdf (L_BYTEA *bas, L_DNA **pda)
 
static char * generatePagesObjStringPdf (NUMA *napage)
 
static L_BYTEAsubstituteObjectNumbers (L_BYTEA *bas, NUMA *na_objs)
 
static L_PDF_DATApdfdataCreate (const char *title)
 
static void pdfdataDestroy (L_PDF_DATA **plpd)
 
static L_COMP_DATApdfdataGetCid (L_PDF_DATA *lpd, l_int32 index)
 
l_ok pixConvertToPdfData (PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position)
 
l_ok ptraConcatenatePdfToData (L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes)
 
l_ok convertTiffMultipageToPdf (const char *filein, const char *fileout)
 
l_ok l_generateCIDataForPdf (const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid)
 
l_ok l_generateCIData (const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
 
L_COMP_DATAl_generateFlateDataPdf (const char *fname, PIX *pixs)
 
L_COMP_DATAl_generateJpegData (const char *fname, l_int32 ascii85flag)
 
L_COMP_DATAl_generateJpegDataMem (l_uint8 *data, size_t nbytes, l_int32 ascii85flag)
 
L_COMP_DATAl_generateG4Data (const char *fname, l_int32 ascii85flag)
 
l_ok pixGenerateCIData (PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid)
 
L_COMP_DATAl_generateFlateData (const char *fname, l_int32 ascii85flag)
 
l_ok cidConvertToPdfData (L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes)
 
void l_CIDataDestroy (L_COMP_DATA **pcid)
 
l_ok getPdfPageCount (const char *fname, l_int32 *pnpages)
 
l_ok getPdfPageSizes (const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
 
l_ok getPdfMediaBoxSizes (const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh)
 
l_ok getPdfRendererResolution (const char *infile, const char *outdir, l_int32 *pres)
 
void l_pdfSetG4ImageMask (l_int32 flag)
 
void l_pdfSetDateAndVersion (l_int32 flag)
 

Variables

static const l_int32 DefaultInputRes = 300
 
static l_int32 var_WRITE_G4_IMAGE_MASK = 1
 
static l_int32 var_WRITE_DATE_AND_VERSION = 1
 

Detailed Description


   Lower-level operations for generating pdf.

    Intermediate function for single page, multi-image conversion
         l_int32              pixConvertToPdfData()

    Intermediate function for generating multipage pdf output
         l_int32              ptraConcatenatePdfToData()

    Convert tiff multipage to pdf file
         l_int32              convertTiffMultipageToPdf()

    Generates the CID, transcoding under some conditions
         l_int32              l_generateCIDataForPdf()
         l_int32              l_generateCIData()

      Lower-level CID generation without transcoding
         L_COMP_DATA         *l_generateFlateDataPdf()
         L_COMP_DATA         *l_generateJpegData()
         L_COMP_DATA         *l_generateJpegDataMem()
         static L_COMP_DATA  *l_generateJp2kData()
         L_COMP_DATA         *l_generateG4Data()

      Lower-level CID generation with transcoding
         l_int32              pixGenerateCIData()
         L_COMP_DATA         *l_generateFlateData()
         static L_COMP_DATA  *pixGenerateFlateData()
         static L_COMP_DATA  *pixGenerateJpegData()
         static L_COMP_DATA  *pixGenerateJp2kData()
         static L_COMP_DATA  *pixGenerateG4Data()

      Other CID operations
         l_int32              cidConvertToPdfData()
         void                 l_CIDataDestroy()

    Helper functions for generating the output pdf string
         static l_int32       l_generatePdf()
         static void          generateFixedStringsPdf()
         static char         *generateEscapeString()
         static void          generateMediaboxPdf()
         static l_int32       generatePageStringPdf()
         static l_int32       generateContentStringPdf()
         static l_int32       generatePreXStringsPdf()
         static l_int32       generateColormapStringsPdf()
         static void          generateTrailerPdf()
         static l_int32       makeTrailerStringPdf()
         static l_int32       generateOutputDataPdf()

    Helper functions for generating multipage pdf output
         static l_int32       parseTrailerPdf()
         static char         *generatePagesObjStringPdf()
         static L_BYTEA      *substituteObjectNumbers()

    Create/destroy/access pdf data
         static L_PDF_DATA   *pdfdataCreate()
         static void          pdfdataDestroy()
         static L_COMP_DATA  *pdfdataGetCid()

    Find number of pages in a pdf
         l_int32              getPdfPageCount()

    Find widths and heights of pages and media boxes in a pdf
         l_int32              getPdfPageSizes()
         l_int32              getPdfMediaBoxSizes()

    Find effective resolution of images rendered from a pdf
         l_int32              getPdfRendererResolution()

    Set flags for special modes
         void                 l_pdfSetG4ImageMask()
         void                 l_pdfSetDateAndVersion()

Definition in file pdfio2.c.

Macro Definition Documentation

◆ DEBUG_MULTIPAGE

#define DEBUG_MULTIPAGE   0

Definition at line 162 of file pdfio2.c.

◆ L_BIGBUF

#define L_BIGBUF   2048 /* must be able to hold hex colormap */

Definition at line 158 of file pdfio2.c.

◆ L_SMALLBUF

#define L_SMALLBUF   256

Definition at line 157 of file pdfio2.c.

Function Documentation

◆ cidConvertToPdfData()

l_ok cidConvertToPdfData ( L_COMP_DATA * cid,
const char * title,
l_uint8 ** pdata,
size_t * pnbytes )

cidConvertToPdfData()

Parameters
[in]cidcompressed image data
[in]title[optional] pdf title; can be null
[out]pdataoutput pdf data for image
[out]pnbytessize of output pdf data
Returns
0 if OK, 1 on error
Notes:
     (1) Caller must not destroy the cid.  It is absorbed in the
         lpd and destroyed by this function.

Definition at line 1609 of file pdfio2.c.

References L_Pdf_Data::cida, L_Compressed_Data::h, l_generatePdf(), L_Pdf_Data::n, L_Compressed_Data::res, L_Compressed_Data::w, L_Pdf_Data::wh, and L_Pdf_Data::xy.

◆ convertTiffMultipageToPdf()

l_ok convertTiffMultipageToPdf ( const char * filein,
const char * fileout )

convertTiffMultipageToPdf()

Parameters
[in]filein(tiff)
[in]fileout(pdf)
Returns
0 if OK, 1 on error
Notes:
     (1) A multipage tiff file can also be converted to PS, using
         convertTiffMultipageToPS()

Definition at line 491 of file pdfio2.c.

◆ generateColormapStringsPdf()

static l_int32 generateColormapStringsPdf ( L_PDF_DATA * lpd)
static

Definition at line 2092 of file pdfio2.c.

◆ generateContentStringPdf()

static l_int32 generateContentStringPdf ( L_PDF_DATA * lpd)
static

Definition at line 1919 of file pdfio2.c.

◆ generateEscapeString()

static char * generateEscapeString ( const char * str)
static

generateEscapeString()

Parameters
[in]strinput string
Returns
hex escape string, or null on error
Notes:
     (1) If the input string is not ascii, returns null.
     (2) This takes an input ascii string and generates a hex
         ascii output string with 4 bytes out for each byte in.
         The feff code at the beginning tells the pdf interpreter
         that the data is to be interpreted as big-endian, 4 bytes
         at a time.  For ascii, the first two bytes are 0 and the
         last two bytes are less than 0x80.

Definition at line 1809 of file pdfio2.c.

◆ generateFixedStringsPdf()

static void generateFixedStringsPdf ( L_PDF_DATA * lpd)
static

Definition at line 1726 of file pdfio2.c.

◆ generateMediaboxPdf()

static void generateMediaboxPdf ( L_PDF_DATA * lpd)
static

Definition at line 1836 of file pdfio2.c.

◆ generateOutputDataPdf()

static l_int32 generateOutputDataPdf ( l_uint8 ** pdata,
size_t * pnbytes,
L_PDF_DATA * lpd )
static

generateOutputDataPdf()

Parameters
[out]pdatapdf data array
[out]pnbytessize of pdf data array
[in]lpdinput data used to make pdf
Returns
0 if OK, 1 on error
Notes:
     (1) Only called from l_generatePdf().  On error, no data is returned.

Definition at line 2225 of file pdfio2.c.

References L_Compressed_Data::datacomp, L_Pdf_Data::id, L_NOCOPY, L_Pdf_Data::n, L_Compressed_Data::nbytescomp, L_Pdf_Data::ncmap, L_Pdf_Data::obj1, L_Pdf_Data::obj2, L_Pdf_Data::obj3, L_Pdf_Data::obj4, L_Pdf_Data::obj5, L_Pdf_Data::objloc, L_Pdf_Data::objsize, L_Pdf_Data::poststream, L_Pdf_Data::sacmap, L_Pdf_Data::saprex, L_Pdf_Data::trailer, and L_Pdf_Data::xrefloc.

Referenced by l_generatePdf().

◆ generatePagesObjStringPdf()

static char * generatePagesObjStringPdf ( NUMA * napage)
static

Definition at line 2397 of file pdfio2.c.

◆ generatePageStringPdf()

static l_int32 generatePageStringPdf ( L_PDF_DATA * lpd)
static

Definition at line 1867 of file pdfio2.c.

◆ generatePreXStringsPdf()

static l_int32 generatePreXStringsPdf ( L_PDF_DATA * lpd)
static

Definition at line 1965 of file pdfio2.c.

◆ generateTrailerPdf()

static void generateTrailerPdf ( L_PDF_DATA * lpd)
static

Definition at line 2131 of file pdfio2.c.

◆ getPdfMediaBoxSizes()

l_ok getPdfMediaBoxSizes ( const char * fname,
NUMA ** pnaw,
NUMA ** pnah,
l_int32 * pmedw,
l_int32 * pmedh )

getPdfMediaBoxSizes()

Parameters
[in]fnamefilename
[out]pnaw[optional] array of mediabox widths
[out]pnah[optional] array of mediabox heights
[out]pmedw[optional] median mediabox width
[out]pmedh[optional] median mediabox height
Returns
0 if OK, 1 on error
Notes:
     (1) Finds the arguments of each instance of '/MediaBox' in the file.
     (2) This will not work on encrypted pdf files or on files where
         the "/MediaBoxes" field is binary compressed.  Not finding
         the "/MediaBoxes" field is not an error, but a warning is given.
     (3) This is useful for determining if the media boxes are
         incorrectly assigned, such as assuming the resolution is 72 ppi.
         If that happens and the input the the renderer assumes the
         resolution is 300 ppi, the rendered images will be over 4x too
         large in each dimension.
     (4) An image dimension of 11 inches corresponds to a MediaBox
         parameter of 792.  We consider a value > 850 to be oversized
         and not to be taken literally.

Definition at line 2828 of file pdfio2.c.

Referenced by getPdfRendererResolution().

◆ getPdfPageCount()

l_ok getPdfPageCount ( const char * fname,
l_int32 * pnpages )

getPdfPageCount()

Parameters
[in]fnamefilename
[out]pnpagesnumber of pages
Returns
0 if OK, 1 on error
Notes:
     (1) Looks for the argument of the first instance of /Count in the file.
     (2) This first reads 10000 bytes from the beginning of the file.
         If "/Count" is not in that string, it reads the entire file
         and looks for "/Count".
     (3) This will not work on encrypted pdf files or on files where
         the "/Count" field is binary compressed.  Not finding the
         "/Count" field is not an error, but a warning is given.

Definition at line 2621 of file pdfio2.c.

Referenced by getPdfRendererResolution().

◆ getPdfPageSizes()

l_ok getPdfPageSizes ( const char * fname,
NUMA ** pnaw,
NUMA ** pnah,
l_int32 * pmedw,
l_int32 * pmedh )

getPdfPageSizes()

Parameters
[in]fnamefilename
[out]pnaw[optional] array of page widths
[out]pnah[optional] array of page heights
[out]pmedw[optional] median page width
[out]pmedh[optional] median page height
Returns
0 if OK, 1 on error
Notes:
     (1) Finds the arguments of each instance of '/Width' and '/Height'
         in the file.
     (2) This will not work on encrypted pdf files or on files where
         the "/Width" and "/Height" fields are binary compressed.
         Not finding the "/Width" and /Height" fields is not an error,
         but a warning is given.

Definition at line 2705 of file pdfio2.c.

◆ getPdfRendererResolution()

l_ok getPdfRendererResolution ( const char * infile,
const char * outdir,
l_int32 * pres )

getPdfRendererResolution()

Parameters
[in]infilefilename of input pdf file
[in]outdirdirectory of rendered output images
[out]presdesired resolution to use with renderer
Returns
0 if OK, 1 on error
Notes:
     (1) Finds the input resolution to pdftoppm that will generate
         images with a maximum dimension of about 3300 pixels,
         representing a full page at 300 ppi.
     (2) It is most important is to make sure the renderer does
         not make huge images because of an error in /MediaBox.
         An image dimension of 11 inches corresponds to a MediaBox
         parameter of 792.  We consider a value > 850 to be oversized
         and not to be taken literally.  If the mediaboxes are
         oversized, choose an appropriate lower resolution.
     (3) If the mediaboxes are not accessible, render an image at
         a low known resolution (say, 72 ppi) and based on the image
         size, determine the resolution necessary to make an image
         with 3300 pixels in the largest dimension.
     (4) Requires pdftoppm, so this is disabled on windows for now.
     (5) Requires the ability to call an external program, so it is
         necessary to call setLeptDebugOK(1) before this function.

Definition at line 2940 of file pdfio2.c.

References getPdfMediaBoxSizes(), getPdfPageCount(), and L_NOCOPY.

◆ l_CIDataDestroy()

void l_CIDataDestroy ( L_COMP_DATA ** pcid)

l_CIDataDestroy()

Parameters
[in,out]pcidwill be set to null before returning
Returns
void

Definition at line 1656 of file pdfio2.c.

References L_Compressed_Data::cmapdata85, L_Compressed_Data::cmapdatahex, L_Compressed_Data::data85, and L_Compressed_Data::datacomp.

Referenced by l_generateJp2kData().

◆ l_generateCIData()

l_ok l_generateCIData ( const char * fname,
l_int32 type,
l_int32 quality,
l_int32 ascii85,
L_COMP_DATA ** pcid )

l_generateCIData()

Parameters
[in]fname
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[in]ascii850 for binary; 1 for ascii85-encoded
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) This can be used for both PostScript and pdf.
     (1) Set ascii85:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) This attempts to compress according to the requested type.
         If this can't be done, it falls back to ordinary flate encoding.
     (3) This differs from l_generateCIDataForPdf(), which determines
         the file format and only works for pdf.

Definition at line 625 of file pdfio2.c.

References L_FLATE_ENCODE, L_G4_ENCODE, l_generateFlateData(), l_generateJp2kData(), l_generateJpegData(), L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateG4Data(), pixGenerateJp2kData(), and pixGenerateJpegData().

◆ l_generateCIDataForPdf()

l_ok l_generateCIDataForPdf ( const char * fname,
PIX * pix,
l_int32 quality,
L_COMP_DATA ** pcid )

l_generateCIDataForPdf()

Parameters
[in]fname[optional] can be null
[in]pix[optional] can be null
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) You must set either filename or pix.
     (2) Given an image file and optionally a pix raster of that data,
         this provides a CID that is compatible with PDF, preferably
         without transcoding.
     (3) The pix is included for efficiency, in case transcoding
         is required and the pix is available to the caller.
     (4) We don't try to open files named "stdin" or "-" for Tesseract
         compatibility reasons. We may remove this restriction
         in the future.
     (5) Note that tiff-g4 must be transcoded to properly handle byte
         order and perhaps photometry (e.g., min-is-black).  For a
         multipage tiff file, data will only be extracted from the
         first page, so this should not be invoked.

Definition at line 543 of file pdfio2.c.

References l_generateFlateDataPdf(), l_generateJp2kData(), l_generateJpegData(), and pixGenerateCIData().

◆ l_generateFlateData()

L_COMP_DATA * l_generateFlateData ( const char * fname,
l_int32 ascii85flag )

l_generateFlateData()

Parameters
[in]fname
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid flate compressed image data, or NULL on error
Notes:
     (1) The input image is converted to one of these 4 types:
          ~ 1 bpp
          ~ 8 bpp, no colormap
          ~ 8 bpp, colormap
          ~ 32 bpp rgb
     (2) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (3) Always transcodes (i.e., first decodes the png file)

Definition at line 1308 of file pdfio2.c.

References pixGenerateFlateData().

Referenced by l_generateCIData(), and l_generateFlateDataPdf().

◆ l_generateFlateDataPdf()

L_COMP_DATA * l_generateFlateDataPdf ( const char * fname,
PIX * pixs )

l_generateFlateDataPdf()

Parameters
[in]fnamepreferably png
[in]pixs[optional] can be null
Returns
cid containing png data, or NULL on error
Notes:
     (1) If you hand this a png file, you are going to get
         png predictors embedded in the flate data. So it has
         come to this. http://xkcd.com/1022/
     (2) Exception: if the png is interlaced or if it is RGBA,
         it will be transcoded.
     (3) If transcoding is required, this will not have to read from
         file if a pix is input.

Definition at line 727 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::cmapdatahex, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_FLATE_ENCODE, l_generateFlateData(), L_Compressed_Data::nbytescomp, L_Compressed_Data::ncolors, pixGenerateFlateData(), L_Compressed_Data::predictor, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by l_generateCIDataForPdf().

◆ l_generateG4Data()

L_COMP_DATA * l_generateG4Data ( const char * fname,
l_int32 ascii85flag )

l_generateG4Data()

Parameters
[in]fnameof g4 compressed file
[in]ascii85flag0 for g4 compressed; 1 for ascii85-encoded g4
Returns
cid g4 compressed image data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) This does not work for multipage tiff files.

Definition at line 1114 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_G4_ENCODE, L_Compressed_Data::minisblack, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by pixGenerateG4Data().

◆ l_generateJp2kData()

static L_COMP_DATA * l_generateJp2kData ( const char * fname)
static

l_generateJp2kData()

Parameters
[in]fnameof jp2k file
Returns
cid containing jp2k data, or NULL on error
Notes:
     (1) This is only called after the file is verified to be jp2k.

Definition at line 1062 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::datacomp, L_Compressed_Data::h, l_CIDataDestroy(), L_JP2K_ENCODE, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by l_generateCIData(), l_generateCIDataForPdf(), and pixGenerateJp2kData().

◆ l_generateJpegData()

L_COMP_DATA * l_generateJpegData ( const char * fname,
l_int32 ascii85flag )

l_generateJpegData()

Parameters
[in]fnameof jpeg file
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
Returns
cid containing jpeg data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) Most of this function is repeated in l_generateJpegMemData(),
         which is required in pixacompFastConvertToPdfData().

Definition at line 925 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_JPEG_ENCODE, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by l_generateCIData(), l_generateCIDataForPdf(), and pixGenerateJpegData().

◆ l_generateJpegDataMem()

L_COMP_DATA * l_generateJpegDataMem ( l_uint8 * data,
size_t nbytes,
l_int32 ascii85flag )

l_generateJpegDataMem()

Parameters
[in]dataof jpeg-encoded file
[in]nbytessize of jpeg-encoded file
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
Returns
cid containing jpeg data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)

Definition at line 1002 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_JPEG_ENCODE, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

◆ l_generatePdf()

static l_int32 l_generatePdf ( l_uint8 ** pdata,
size_t * pnbytes,
L_PDF_DATA * lpd )
static

l_generatePdf()

Parameters
[out]pdatapdf array
[out]pnbytesnumber of bytes in pdf array
[in]lpdall the required input image data
Returns
0 if OK, 1 on error
Notes:
     (1) On error, no data is returned.
     (2) The objects are:
           1: Catalog
           2: Info
           3: Pages
           4: Page
           5: Contents  (rendering command)
           6 to 6+n-1: n XObjects
           6+n to 6+n+m-1: m colormaps

Definition at line 1701 of file pdfio2.c.

References generateOutputDataPdf().

Referenced by cidConvertToPdfData(), and pixConvertToPdfData().

◆ l_pdfSetDateAndVersion()

void l_pdfSetDateAndVersion ( l_int32 flag)

l_pdfSetDateAndVersion()

Parameters
[in]flag1 for writing date/time and leptonica version; 0 for omitting this from the metadata
Returns
void
Notes:
     (1) The default is for writing this data.  For regression tests
         that compare output against golden files, it is useful to omit.

Definition at line 3051 of file pdfio2.c.

◆ l_pdfSetG4ImageMask()

void l_pdfSetG4ImageMask ( l_int32 flag)

l_pdfSetG4ImageMask()

Parameters
[in]flag1 for writing g4 data as fg only through a mask; 0 for writing fg and bg
Returns
void
Notes:
     (1) The default is for writing only the fg (through the mask).
         That way when you write a 1 bpp image, the bg is transparent,
         so any previously written image remains visible behind it.

Definition at line 3031 of file pdfio2.c.

◆ makeTrailerStringPdf()

static char * makeTrailerStringPdf ( L_DNA * daloc)
static

Definition at line 2172 of file pdfio2.c.

◆ parseTrailerPdf()

static l_int32 parseTrailerPdf ( L_BYTEA * bas,
L_DNA ** pda )
static

parseTrailerPdf()

Parameters
[in]baslba of a pdf file
[out]pdabyte locations of the beginning of each object
Returns
0 if OK, 1 on error

Definition at line 2299 of file pdfio2.c.

References L_NOCOPY.

Referenced by ptraConcatenatePdfToData().

◆ pdfdataCreate()

static L_PDF_DATA * pdfdataCreate ( const char * title)
static

Definition at line 2529 of file pdfio2.c.

◆ pdfdataDestroy()

static void pdfdataDestroy ( L_PDF_DATA ** plpd)
static

Definition at line 2546 of file pdfio2.c.

◆ pdfdataGetCid()

static L_COMP_DATA * pdfdataGetCid ( L_PDF_DATA * lpd,
l_int32 index )
static

Definition at line 2587 of file pdfio2.c.

◆ pixConvertToPdfData()

l_ok pixConvertToPdfData ( PIX * pix,
l_int32 type,
l_int32 quality,
l_uint8 ** pdata,
size_t * pnbytes,
l_int32 x,
l_int32 y,
l_int32 res,
const char * title,
L_PDF_DATA ** plpd,
l_int32 position )

pixConvertToPdfData()

Parameters
[in]pixall depths; cmap OK
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE
[in]qualityfor jpeg: 1-100; 0 for default (75) for jp2k: 27-45; 0 for default (34)
[out]pdatapdf array
[out]pnbytesnumber of bytes in pdf array
[in]x,ylocation of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page)
[in]resoverride the resolution of the input image, in ppi; use 0 to respect resolution embedded in the input
[in]title[optional] pdf title; can be null
[in,out]plpdptr to lpd; created on the first invocation and returned until last image is processed
[in]positionin image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE
Returns
0 if OK, 1 on error
Notes:
     (1) If res == 0 and the input resolution field from the pix is 0,
         this will use DefaultInputRes.
     (2) This only writes data if it is the last image to be
         written on the page.
     (3) See comments in convertToPdf().

Definition at line 201 of file pdfio2.c.

References L_Pdf_Data::cida, L_Compressed_Data::h, L_FIRST_IMAGE, L_FLATE_ENCODE, L_G4_ENCODE, l_generatePdf(), L_JP2K_ENCODE, L_JPEG_ENCODE, L_LAST_IMAGE, L_Pdf_Data::n, pixGenerateCIData(), L_Compressed_Data::res, L_Compressed_Data::w, L_Pdf_Data::wh, and L_Pdf_Data::xy.

◆ pixGenerateCIData()

l_ok pixGenerateCIData ( PIX * pixs,
l_int32 type,
l_int32 quality,
l_int32 ascii85,
L_COMP_DATA ** pcid )

pixGenerateCIData()

Parameters
[in]pixs8 or 32 bpp, no colormap
[in]typeL_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or L_JP2K_ENCODE
[in]qualityfor jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34)
[in]ascii850 for binary; 1 for ascii85-encoded
[out]pcidcompressed data
Returns
0 if OK, 1 on error
Notes:
     (1) Set ascii85:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) Do not accept images with an asperity ratio greater than 10.

Definition at line 1206 of file pdfio2.c.

References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateFlateData(), pixGenerateG4Data(), pixGenerateJp2kData(), and pixGenerateJpegData().

Referenced by l_generateCIDataForPdf(), and pixConvertToPdfData().

◆ pixGenerateFlateData()

static L_COMP_DATA * pixGenerateFlateData ( PIX * pixs,
l_int32 ascii85flag )
static

pixGenerateFlateData()

Parameters
[in]pixs
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid flate compressed image data, or NULL on error
Notes:
    (1) If called with an RGBA pix (spp == 4), the alpha channel
        will be removed, projecting a white backgrouond through
        any transparency.
    (2) If called with a colormapped pix, any transparency in the
        alpha component in the colormap will be ignored, as it is
        for all leptonica operations on colormapped pix.

Definition at line 1343 of file pdfio2.c.

References L_Compressed_Data::bps, L_Compressed_Data::cmapdata85, L_Compressed_Data::cmapdatahex, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_FLATE_ENCODE, L_Compressed_Data::nbytes, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::ncolors, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.

Referenced by l_generateFlateData(), l_generateFlateDataPdf(), and pixGenerateCIData().

◆ pixGenerateG4Data()

static L_COMP_DATA * pixGenerateG4Data ( PIX * pixs,
l_int32 ascii85flag )
static

pixGenerateG4Data()

Parameters
[in]pixs1 bpp, no colormap
[in]ascii85flag0 for gzipped; 1 for ascii85-encoded gzipped
Returns
cid g4 compressed image data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)

Definition at line 1565 of file pdfio2.c.

References l_generateG4Data().

Referenced by l_generateCIData(), and pixGenerateCIData().

◆ pixGenerateJp2kData()

static L_COMP_DATA * pixGenerateJp2kData ( PIX * pixs,
l_int32 quality )
static

pixGenerateJp2kData()

Parameters
[in]pixs8 or 32 bpp, no colormap
[in]quality0 for default, which is 34
Returns
cid jp2k compressed data, or NULL on error
Notes:
     (1) The quality can be set between 27 (very poor) and 45
         (nearly perfect).  Use 0 for default (34). Use 100 for lossless,
         but this is very expensive and not recommended.

Definition at line 1519 of file pdfio2.c.

References l_generateJp2kData().

Referenced by l_generateCIData(), and pixGenerateCIData().

◆ pixGenerateJpegData()

static L_COMP_DATA * pixGenerateJpegData ( PIX * pixs,
l_int32 ascii85flag,
l_int32 quality )
static

pixGenerateJpegData()

Parameters
[in]pixs8, 16 or 32 bpp, no colormap
[in]ascii85flag0 for jpeg; 1 for ascii85-encoded jpeg
[in]quality0 for default, which is 75
Returns
cid jpeg compressed data, or NULL on error
Notes:
     (1) Set ascii85flag:
          ~ 0 for binary data (PDF only)
          ~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
     (2) If 16 bpp, convert first to 8 bpp, using the MSB

Definition at line 1471 of file pdfio2.c.

References l_generateJpegData().

Referenced by l_generateCIData(), and pixGenerateCIData().

◆ ptraConcatenatePdfToData()

l_ok ptraConcatenatePdfToData ( L_PTRA * pa_data,
SARRAY * sa,
l_uint8 ** pdata,
size_t * pnbytes )

ptraConcatenatePdfToData()

Parameters
[in]pa_dataptra array of pdf strings, each for a single-page pdf file
[in]sa[optional] string array of pathnames for input pdf files; can be null
[out]pdataconcatenated pdf data in memory
[out]pnbytesnumber of bytes in pdf data
Returns
0 if OK, 1 on error
Notes:
     (1) This only works with leptonica-formatted single-page pdf files.
         pdf files generated by other programs will have unpredictable
         (and usually bad) results.  The requirements for each pdf file:
           (a) The Catalog and Info objects are the first two.
           (b) Object 3 is Pages
           (c) Object 4 is Page
           (d) The remaining objects are Contents, XObjects, and ColorSpace
     (2) We remove trailers from each page, and append the full trailer
         for all pages at the end.
     (3) For all but the first file, remove the ID and the first 3
         objects (catalog, info, pages), so that each subsequent
         file has only objects of these classes:
             Page, Contents, XObject, ColorSpace (Indexed RGB).
         For those objects, we substitute these refs to objects
         in the local file:
             Page:  Parent(object 3), Contents, XObject(typically multiple)
             XObject:  [ColorSpace if indexed]
         The Pages object on the first page (object 3) has a Kids array
         of references to all the Page objects, with a Count equal
         to the number of pages.  Each Page object refers back to
         this parent.

Definition at line 329 of file pdfio2.c.

References L_CLONE, L_INSERT, L_NO_COMPACTION, L_NOCOPY, parseTrailerPdf(), and substituteObjectNumbers().

◆ substituteObjectNumbers()

static L_BYTEA * substituteObjectNumbers ( L_BYTEA * bas,
NUMA * na_objs )
static

substituteObjectNumbers()

Parameters
[in]baslba of a pdf object
[in]na_objsobject number mapping array
Returns
bad lba of rewritten pdf for the object
Notes:
     (1) Interpret the first set of bytes as the object number,
         map to the new number, and write it out.
     (2) Find all occurrences of this 4-byte sequence: " 0 R"
     (3) Find the location and value of the integer preceding this,
         and map it to the new value.
     (4) Rewrite the object with new object numbers.

Definition at line 2450 of file pdfio2.c.

Referenced by ptraConcatenatePdfToData().

Variable Documentation

◆ DefaultInputRes

const l_int32 DefaultInputRes = 300
static

Definition at line 118 of file pdfio2.c.

◆ var_WRITE_DATE_AND_VERSION

l_int32 var_WRITE_DATE_AND_VERSION = 1
static

Definition at line 155 of file pdfio2.c.

◆ var_WRITE_G4_IMAGE_MASK

l_int32 var_WRITE_G4_IMAGE_MASK = 1
static

Definition at line 153 of file pdfio2.c.