|
| static L_COMP_DATA * | l_generateJp2kData (const char *fname) |
| |
| static L_COMP_DATA * | pixGenerateFlateData (PIX *pixs, l_int32 ascii85flag) |
| |
| static L_COMP_DATA * | pixGenerateJpegData (PIX *pixs, l_int32 ascii85flag, l_int32 quality) |
| |
| static L_COMP_DATA * | pixGenerateJp2kData (PIX *pixs, l_int32 quality) |
| |
| static L_COMP_DATA * | pixGenerateG4Data (PIX *pixs, l_int32 ascii85flag) |
| |
| static l_int32 | l_generatePdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd) |
| |
| static void | generateFixedStringsPdf (L_PDF_DATA *lpd) |
| |
| static char * | generateEscapeString (const char *str) |
| |
| static void | generateMediaboxPdf (L_PDF_DATA *lpd) |
| |
| static l_int32 | generatePageStringPdf (L_PDF_DATA *lpd) |
| |
| static l_int32 | generateContentStringPdf (L_PDF_DATA *lpd) |
| |
| static l_int32 | generatePreXStringsPdf (L_PDF_DATA *lpd) |
| |
| static l_int32 | generateColormapStringsPdf (L_PDF_DATA *lpd) |
| |
| static void | generateTrailerPdf (L_PDF_DATA *lpd) |
| |
| static char * | makeTrailerStringPdf (L_DNA *daloc) |
| |
| static l_int32 | generateOutputDataPdf (l_uint8 **pdata, size_t *pnbytes, L_PDF_DATA *lpd) |
| |
| static l_int32 | parseTrailerPdf (L_BYTEA *bas, L_DNA **pda) |
| |
| static char * | generatePagesObjStringPdf (NUMA *napage) |
| |
| static L_BYTEA * | substituteObjectNumbers (L_BYTEA *bas, NUMA *na_objs) |
| |
| static L_PDF_DATA * | pdfdataCreate (const char *title) |
| |
| static void | pdfdataDestroy (L_PDF_DATA **plpd) |
| |
| static L_COMP_DATA * | pdfdataGetCid (L_PDF_DATA *lpd, l_int32 index) |
| |
| l_ok | pixConvertToPdfData (PIX *pix, l_int32 type, l_int32 quality, l_uint8 **pdata, size_t *pnbytes, l_int32 x, l_int32 y, l_int32 res, const char *title, L_PDF_DATA **plpd, l_int32 position) |
| |
| l_ok | ptraConcatenatePdfToData (L_PTRA *pa_data, SARRAY *sa, l_uint8 **pdata, size_t *pnbytes) |
| |
| l_ok | convertTiffMultipageToPdf (const char *filein, const char *fileout) |
| |
| l_ok | l_generateCIDataForPdf (const char *fname, PIX *pix, l_int32 quality, L_COMP_DATA **pcid) |
| |
| l_ok | l_generateCIData (const char *fname, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid) |
| |
| L_COMP_DATA * | l_generateFlateDataPdf (const char *fname, PIX *pixs) |
| |
| L_COMP_DATA * | l_generateJpegData (const char *fname, l_int32 ascii85flag) |
| |
| L_COMP_DATA * | l_generateJpegDataMem (l_uint8 *data, size_t nbytes, l_int32 ascii85flag) |
| |
| L_COMP_DATA * | l_generateG4Data (const char *fname, l_int32 ascii85flag) |
| |
| l_ok | pixGenerateCIData (PIX *pixs, l_int32 type, l_int32 quality, l_int32 ascii85, L_COMP_DATA **pcid) |
| |
| L_COMP_DATA * | l_generateFlateData (const char *fname, l_int32 ascii85flag) |
| |
| l_ok | cidConvertToPdfData (L_COMP_DATA *cid, const char *title, l_uint8 **pdata, size_t *pnbytes) |
| |
| void | l_CIDataDestroy (L_COMP_DATA **pcid) |
| |
| l_ok | getPdfPageCount (const char *fname, l_int32 *pnpages) |
| |
| l_ok | getPdfPageSizes (const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh) |
| |
| l_ok | getPdfMediaBoxSizes (const char *fname, NUMA **pnaw, NUMA **pnah, l_int32 *pmedw, l_int32 *pmedh) |
| |
| l_ok | getPdfRendererResolution (const char *infile, const char *outdir, l_int32 *pres) |
| |
| void | l_pdfSetG4ImageMask (l_int32 flag) |
| |
| void | l_pdfSetDateAndVersion (l_int32 flag) |
| |
Lower-level operations for generating pdf.
Intermediate function for single page, multi-image conversion
l_int32 pixConvertToPdfData()
Intermediate function for generating multipage pdf output
l_int32 ptraConcatenatePdfToData()
Convert tiff multipage to pdf file
l_int32 convertTiffMultipageToPdf()
Generates the CID, transcoding under some conditions
l_int32 l_generateCIDataForPdf()
l_int32 l_generateCIData()
Lower-level CID generation without transcoding
L_COMP_DATA *l_generateFlateDataPdf()
L_COMP_DATA *l_generateJpegData()
L_COMP_DATA *l_generateJpegDataMem()
static L_COMP_DATA *l_generateJp2kData()
L_COMP_DATA *l_generateG4Data()
Lower-level CID generation with transcoding
l_int32 pixGenerateCIData()
L_COMP_DATA *l_generateFlateData()
static L_COMP_DATA *pixGenerateFlateData()
static L_COMP_DATA *pixGenerateJpegData()
static L_COMP_DATA *pixGenerateJp2kData()
static L_COMP_DATA *pixGenerateG4Data()
Other CID operations
l_int32 cidConvertToPdfData()
void l_CIDataDestroy()
Helper functions for generating the output pdf string
static l_int32 l_generatePdf()
static void generateFixedStringsPdf()
static char *generateEscapeString()
static void generateMediaboxPdf()
static l_int32 generatePageStringPdf()
static l_int32 generateContentStringPdf()
static l_int32 generatePreXStringsPdf()
static l_int32 generateColormapStringsPdf()
static void generateTrailerPdf()
static l_int32 makeTrailerStringPdf()
static l_int32 generateOutputDataPdf()
Helper functions for generating multipage pdf output
static l_int32 parseTrailerPdf()
static char *generatePagesObjStringPdf()
static L_BYTEA *substituteObjectNumbers()
Create/destroy/access pdf data
static L_PDF_DATA *pdfdataCreate()
static void pdfdataDestroy()
static L_COMP_DATA *pdfdataGetCid()
Find number of pages in a pdf
l_int32 getPdfPageCount()
Find widths and heights of pages and media boxes in a pdf
l_int32 getPdfPageSizes()
l_int32 getPdfMediaBoxSizes()
Find effective resolution of images rendered from a pdf
l_int32 getPdfRendererResolution()
Set flags for special modes
void l_pdfSetG4ImageMask()
void l_pdfSetDateAndVersion()
Definition in file pdfio2.c.
| l_ok getPdfMediaBoxSizes |
( |
const char * | fname, |
|
|
NUMA ** | pnaw, |
|
|
NUMA ** | pnah, |
|
|
l_int32 * | pmedw, |
|
|
l_int32 * | pmedh ) |
getPdfMediaBoxSizes()
- Parameters
-
| [in] | fname | filename |
| [out] | pnaw | [optional] array of mediabox widths |
| [out] | pnah | [optional] array of mediabox heights |
| [out] | pmedw | [optional] median mediabox width |
| [out] | pmedh | [optional] median mediabox height |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Finds the arguments of each instance of '/MediaBox' in the file.
(2) This will not work on encrypted pdf files or on files where
the "/MediaBoxes" field is binary compressed. Not finding
the "/MediaBoxes" field is not an error, but a warning is given.
(3) This is useful for determining if the media boxes are
incorrectly assigned, such as assuming the resolution is 72 ppi.
If that happens and the input the the renderer assumes the
resolution is 300 ppi, the rendered images will be over 4x too
large in each dimension.
(4) An image dimension of 11 inches corresponds to a MediaBox
parameter of 792. We consider a value > 850 to be oversized
and not to be taken literally.
Definition at line 2828 of file pdfio2.c.
Referenced by getPdfRendererResolution().
| l_ok getPdfRendererResolution |
( |
const char * | infile, |
|
|
const char * | outdir, |
|
|
l_int32 * | pres ) |
getPdfRendererResolution()
- Parameters
-
| [in] | infile | filename of input pdf file |
| [in] | outdir | directory of rendered output images |
| [out] | pres | desired resolution to use with renderer |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Finds the input resolution to pdftoppm that will generate
images with a maximum dimension of about 3300 pixels,
representing a full page at 300 ppi.
(2) It is most important is to make sure the renderer does
not make huge images because of an error in /MediaBox.
An image dimension of 11 inches corresponds to a MediaBox
parameter of 792. We consider a value > 850 to be oversized
and not to be taken literally. If the mediaboxes are
oversized, choose an appropriate lower resolution.
(3) If the mediaboxes are not accessible, render an image at
a low known resolution (say, 72 ppi) and based on the image
size, determine the resolution necessary to make an image
with 3300 pixels in the largest dimension.
(4) Requires pdftoppm, so this is disabled on windows for now.
(5) Requires the ability to call an external program, so it is
necessary to call setLeptDebugOK(1) before this function.
Definition at line 2940 of file pdfio2.c.
References getPdfMediaBoxSizes(), getPdfPageCount(), and L_NOCOPY.
| l_ok l_generateCIData |
( |
const char * | fname, |
|
|
l_int32 | type, |
|
|
l_int32 | quality, |
|
|
l_int32 | ascii85, |
|
|
L_COMP_DATA ** | pcid ) |
l_generateCIData()
- Parameters
-
| [in] | fname | |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE |
| [in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
| [in] | ascii85 | 0 for binary; 1 for ascii85-encoded |
| [out] | pcid | compressed data |
- Returns
- 0 if OK, 1 on error
Notes:
(1) This can be used for both PostScript and pdf.
(1) Set ascii85:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(2) This attempts to compress according to the requested type.
If this can't be done, it falls back to ordinary flate encoding.
(3) This differs from l_generateCIDataForPdf(), which determines
the file format and only works for pdf.
Definition at line 625 of file pdfio2.c.
References L_FLATE_ENCODE, L_G4_ENCODE, l_generateFlateData(), l_generateJp2kData(), l_generateJpegData(), L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateG4Data(), pixGenerateJp2kData(), and pixGenerateJpegData().
| l_ok l_generateCIDataForPdf |
( |
const char * | fname, |
|
|
PIX * | pix, |
|
|
l_int32 | quality, |
|
|
L_COMP_DATA ** | pcid ) |
l_generateCIDataForPdf()
- Parameters
-
| [in] | fname | [optional] can be null |
| [in] | pix | [optional] can be null |
| [in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
| [out] | pcid | compressed data |
- Returns
- 0 if OK, 1 on error
Notes:
(1) You must set either filename or pix.
(2) Given an image file and optionally a pix raster of that data,
this provides a CID that is compatible with PDF, preferably
without transcoding.
(3) The pix is included for efficiency, in case transcoding
is required and the pix is available to the caller.
(4) We don't try to open files named "stdin" or "-" for Tesseract
compatibility reasons. We may remove this restriction
in the future.
(5) Note that tiff-g4 must be transcoded to properly handle byte
order and perhaps photometry (e.g., min-is-black). For a
multipage tiff file, data will only be extracted from the
first page, so this should not be invoked.
Definition at line 543 of file pdfio2.c.
References l_generateFlateDataPdf(), l_generateJp2kData(), l_generateJpegData(), and pixGenerateCIData().
| L_COMP_DATA * l_generateFlateData |
( |
const char * | fname, |
|
|
l_int32 | ascii85flag ) |
l_generateFlateData()
- Parameters
-
| [in] | fname | |
| [in] | ascii85flag | 0 for gzipped; 1 for ascii85-encoded gzipped |
- Returns
- cid flate compressed image data, or NULL on error
Notes:
(1) The input image is converted to one of these 4 types:
~ 1 bpp
~ 8 bpp, no colormap
~ 8 bpp, colormap
~ 32 bpp rgb
(2) Set ascii85flag:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(3) Always transcodes (i.e., first decodes the png file)
Definition at line 1308 of file pdfio2.c.
References pixGenerateFlateData().
Referenced by l_generateCIData(), and l_generateFlateDataPdf().
| L_COMP_DATA * l_generateFlateDataPdf |
( |
const char * | fname, |
|
|
PIX * | pixs ) |
l_generateFlateDataPdf()
- Parameters
-
| [in] | fname | preferably png |
| [in] | pixs | [optional] can be null |
- Returns
- cid containing png data, or NULL on error
Notes:
(1) If you hand this a png file, you are going to get
png predictors embedded in the flate data. So it has
come to this. http://xkcd.com/1022/
(2) Exception: if the png is interlaced or if it is RGBA,
it will be transcoded.
(3) If transcoding is required, this will not have to read from
file if a pix is input.
Definition at line 727 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::cmapdatahex, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_FLATE_ENCODE, l_generateFlateData(), L_Compressed_Data::nbytescomp, L_Compressed_Data::ncolors, pixGenerateFlateData(), L_Compressed_Data::predictor, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by l_generateCIDataForPdf().
| L_COMP_DATA * l_generateG4Data |
( |
const char * | fname, |
|
|
l_int32 | ascii85flag ) |
l_generateG4Data()
- Parameters
-
| [in] | fname | of g4 compressed file |
| [in] | ascii85flag | 0 for g4 compressed; 1 for ascii85-encoded g4 |
- Returns
- cid g4 compressed image data, or NULL on error
Notes:
(1) Set ascii85flag:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(2) This does not work for multipage tiff files.
Definition at line 1114 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_G4_ENCODE, L_Compressed_Data::minisblack, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by pixGenerateG4Data().
| L_COMP_DATA * l_generateJpegData |
( |
const char * | fname, |
|
|
l_int32 | ascii85flag ) |
l_generateJpegData()
- Parameters
-
| [in] | fname | of jpeg file |
| [in] | ascii85flag | 0 for jpeg; 1 for ascii85-encoded jpeg |
- Returns
- cid containing jpeg data, or NULL on error
Notes:
(1) Set ascii85flag:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(2) Most of this function is repeated in l_generateJpegMemData(),
which is required in pixacompFastConvertToPdfData().
Definition at line 925 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_JPEG_ENCODE, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by l_generateCIData(), l_generateCIDataForPdf(), and pixGenerateJpegData().
| l_ok pixConvertToPdfData |
( |
PIX * | pix, |
|
|
l_int32 | type, |
|
|
l_int32 | quality, |
|
|
l_uint8 ** | pdata, |
|
|
size_t * | pnbytes, |
|
|
l_int32 | x, |
|
|
l_int32 | y, |
|
|
l_int32 | res, |
|
|
const char * | title, |
|
|
L_PDF_DATA ** | plpd, |
|
|
l_int32 | position ) |
pixConvertToPdfData()
- Parameters
-
| [in] | pix | all depths; cmap OK |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE, L_JP2K_ENCODE |
| [in] | quality | for jpeg: 1-100; 0 for default (75) for jp2k: 27-45; 0 for default (34) |
| [out] | pdata | pdf array |
| [out] | pnbytes | number of bytes in pdf array |
| [in] | x,y | location of lower-left corner of image, in pixels, relative to the PostScript origin (0,0) at the lower-left corner of the page) |
| [in] | res | override the resolution of the input image, in ppi; use 0 to respect resolution embedded in the input |
| [in] | title | [optional] pdf title; can be null |
| [in,out] | plpd | ptr to lpd; created on the first invocation and returned until last image is processed |
| [in] | position | in image sequence: L_FIRST_IMAGE, L_NEXT_IMAGE, L_LAST_IMAGE |
- Returns
- 0 if OK, 1 on error
Notes:
(1) If res == 0 and the input resolution field from the pix is 0,
this will use DefaultInputRes.
(2) This only writes data if it is the last image to be
written on the page.
(3) See comments in convertToPdf().
Definition at line 201 of file pdfio2.c.
References L_Pdf_Data::cida, L_Compressed_Data::h, L_FIRST_IMAGE, L_FLATE_ENCODE, L_G4_ENCODE, l_generatePdf(), L_JP2K_ENCODE, L_JPEG_ENCODE, L_LAST_IMAGE, L_Pdf_Data::n, pixGenerateCIData(), L_Compressed_Data::res, L_Compressed_Data::w, L_Pdf_Data::wh, and L_Pdf_Data::xy.
| l_ok pixGenerateCIData |
( |
PIX * | pixs, |
|
|
l_int32 | type, |
|
|
l_int32 | quality, |
|
|
l_int32 | ascii85, |
|
|
L_COMP_DATA ** | pcid ) |
pixGenerateCIData()
- Parameters
-
| [in] | pixs | 8 or 32 bpp, no colormap |
| [in] | type | L_G4_ENCODE, L_JPEG_ENCODE, L_FLATE_ENCODE or L_JP2K_ENCODE |
| [in] | quality | for jpeg if transcoded: 1-100; 0 for default (75) for jp2k if transcoded: 27-45; 0 for default (34) |
| [in] | ascii85 | 0 for binary; 1 for ascii85-encoded |
| [out] | pcid | compressed data |
- Returns
- 0 if OK, 1 on error
Notes:
(1) Set ascii85:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(2) Do not accept images with an asperity ratio greater than 10.
Definition at line 1206 of file pdfio2.c.
References L_FLATE_ENCODE, L_G4_ENCODE, L_JP2K_ENCODE, L_JPEG_ENCODE, pixGenerateFlateData(), pixGenerateG4Data(), pixGenerateJp2kData(), and pixGenerateJpegData().
Referenced by l_generateCIDataForPdf(), and pixConvertToPdfData().
| static L_COMP_DATA * pixGenerateFlateData |
( |
PIX * | pixs, |
|
|
l_int32 | ascii85flag ) |
|
static |
pixGenerateFlateData()
- Parameters
-
| [in] | pixs | |
| [in] | ascii85flag | 0 for gzipped; 1 for ascii85-encoded gzipped |
- Returns
- cid flate compressed image data, or NULL on error
Notes:
(1) If called with an RGBA pix (spp == 4), the alpha channel
will be removed, projecting a white backgrouond through
any transparency.
(2) If called with a colormapped pix, any transparency in the
alpha component in the colormap will be ignored, as it is
for all leptonica operations on colormapped pix.
Definition at line 1343 of file pdfio2.c.
References L_Compressed_Data::bps, L_Compressed_Data::cmapdata85, L_Compressed_Data::cmapdatahex, L_Compressed_Data::data85, L_Compressed_Data::datacomp, L_Compressed_Data::h, L_FLATE_ENCODE, L_Compressed_Data::nbytes, L_Compressed_Data::nbytes85, L_Compressed_Data::nbytescomp, L_Compressed_Data::ncolors, L_Compressed_Data::res, L_Compressed_Data::spp, L_Compressed_Data::type, and L_Compressed_Data::w.
Referenced by l_generateFlateData(), l_generateFlateDataPdf(), and pixGenerateCIData().
| static L_COMP_DATA * pixGenerateJpegData |
( |
PIX * | pixs, |
|
|
l_int32 | ascii85flag, |
|
|
l_int32 | quality ) |
|
static |
pixGenerateJpegData()
- Parameters
-
| [in] | pixs | 8, 16 or 32 bpp, no colormap |
| [in] | ascii85flag | 0 for jpeg; 1 for ascii85-encoded jpeg |
| [in] | quality | 0 for default, which is 75 |
- Returns
- cid jpeg compressed data, or NULL on error
Notes:
(1) Set ascii85flag:
~ 0 for binary data (PDF only)
~ 1 for ascii85 (5 for 4) encoded binary data (PostScript only)
(2) If 16 bpp, convert first to 8 bpp, using the MSB
Definition at line 1471 of file pdfio2.c.
References l_generateJpegData().
Referenced by l_generateCIData(), and pixGenerateCIData().
| l_ok ptraConcatenatePdfToData |
( |
L_PTRA * | pa_data, |
|
|
SARRAY * | sa, |
|
|
l_uint8 ** | pdata, |
|
|
size_t * | pnbytes ) |
ptraConcatenatePdfToData()
- Parameters
-
| [in] | pa_data | ptra array of pdf strings, each for a single-page pdf file |
| [in] | sa | [optional] string array of pathnames for input pdf files; can be null |
| [out] | pdata | concatenated pdf data in memory |
| [out] | pnbytes | number of bytes in pdf data |
- Returns
- 0 if OK, 1 on error
Notes:
(1) This only works with leptonica-formatted single-page pdf files.
pdf files generated by other programs will have unpredictable
(and usually bad) results. The requirements for each pdf file:
(a) The Catalog and Info objects are the first two.
(b) Object 3 is Pages
(c) Object 4 is Page
(d) The remaining objects are Contents, XObjects, and ColorSpace
(2) We remove trailers from each page, and append the full trailer
for all pages at the end.
(3) For all but the first file, remove the ID and the first 3
objects (catalog, info, pages), so that each subsequent
file has only objects of these classes:
Page, Contents, XObject, ColorSpace (Indexed RGB).
For those objects, we substitute these refs to objects
in the local file:
Page: Parent(object 3), Contents, XObject(typically multiple)
XObject: [ColorSpace if indexed]
The Pages object on the first page (object 3) has a Kids array
of references to all the Page objects, with a Count equal
to the number of pages. Each Page object refers back to
this parent.
Definition at line 329 of file pdfio2.c.
References L_CLONE, L_INSERT, L_NO_COMPACTION, L_NOCOPY, parseTrailerPdf(), and substituteObjectNumbers().
substituteObjectNumbers()
- Parameters
-
| [in] | bas | lba of a pdf object |
| [in] | na_objs | object number mapping array |
- Returns
- bad lba of rewritten pdf for the object
Notes:
(1) Interpret the first set of bytes as the object number,
map to the new number, and write it out.
(2) Find all occurrences of this 4-byte sequence: " 0 R"
(3) Find the location and value of the integer preceding this,
and map it to the new value.
(4) Rewrite the object with new object numbers.
Definition at line 2450 of file pdfio2.c.
Referenced by ptraConcatenatePdfToData().