Leptonica 1.82.0
Image processing and image analysis suite
psio1.c
Go to the documentation of this file.
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
110#ifdef HAVE_CONFIG_H
111#include <config_auto.h>
112#endif /* HAVE_CONFIG_H */
113
114#include <string.h>
115#include "allheaders.h"
116
117/* --------------------------------------------*/
118#if USE_PSIO /* defined in environ.h */
119 /* --------------------------------------------*/
120
121/*-------------------------------------------------------------*
122 * Convert files in a directory to PS *
123 *-------------------------------------------------------------*/
124/*
125 * \brief convertFilesToPS()
126 *
127 * \param[in] dirin input directory
128 * \param[in] substr [optional] substring filter on filenames; can be NULL
129 * \param[in] res typ. 300 or 600 ppi
130 * \param[in] fileout output ps file
131 * \return 0 if OK, 1 on error
132 *
133 * <pre>
134 * Notes:
135 * (1) This generates a PS file for all image files in a specified
136 * directory that contain the substr pattern to be matched.
137 * (2) Each image is written to a separate page in the output PS file.
138 * (3) All images are written compressed:
139 * * if tiffg4 --> use ccittg4
140 * * if jpeg --> use dct
141 * * all others --> use flate
142 * If the image is jpeg or tiffg4, we use the existing compressed
143 * strings for the encoding; otherwise, we read the image into
144 * a pix and flate-encode the pieces.
145 * (4) The resolution is often confusing. It is interpreted
146 * as the resolution of the output display device: "If the
147 * input image were digitized at 300 ppi, what would it
148 * look like when displayed at res ppi." So, for example,
149 * if res = 100 ppi, then the display pixels are 3x larger
150 * than the 300 ppi pixels, and the image will be rendered
151 * 3x larger.
152 * (5) The size of the PostScript file is independent of the resolution,
153 * because the entire file is encoded. The res parameter just
154 * tells the PS decomposer how to render the page. Therefore,
155 * for minimum file size without loss of visual information,
156 * if the output res is less than 300, you should downscale
157 * the image to the output resolution before wrapping in PS.
158 * (6) The "canvas" on which the image is rendered, at the given
159 * output resolution, is a standard page size (8.5 x 11 in).
160 * </pre>
161 */
162l_ok
163convertFilesToPS(const char *dirin,
164 const char *substr,
165 l_int32 res,
166 const char *fileout)
167{
168SARRAY *sa;
169
170 PROCNAME("convertFilesToPS");
171
172 if (!dirin)
173 return ERROR_INT("dirin not defined", procName, 1);
174 if (!fileout)
175 return ERROR_INT("fileout not defined", procName, 1);
176 if (res <= 0) {
177 L_INFO("setting res to 300 ppi\n", procName);
178 res = 300;
179 }
180 if (res < 10 || res > 4000)
181 L_WARNING("res is typically in the range 300-600 ppi\n", procName);
182
183 /* Get all filtered and sorted full pathnames. */
184 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
185
186 /* Generate the PS file. Don't use bounding boxes. */
187 l_psWriteBoundingBox(FALSE);
188 sarrayConvertFilesToPS(sa, res, fileout);
189 l_psWriteBoundingBox(TRUE);
190 sarrayDestroy(&sa);
191 return 0;
192}
193
194
195/*
196
197 * \brief sarrayConvertFilesToPS()
198 *
199 * \param[in] sarray of full path names
200 * \param[in] res typ. 300 or 600 ppi
201 * \param[in] fileout output ps file
202 * \return 0 if OK, 1 on error
203 *
204 * <pre>
205 * Notes:
206 * (1) See convertFilesToPS()
207 * </pre>
208 */
209l_ok
210sarrayConvertFilesToPS(SARRAY *sa,
211 l_int32 res,
212 const char *fileout)
213{
214char *fname;
215l_int32 i, nfiles, index, ret, format;
216
217 PROCNAME("sarrayConvertFilesToPS");
218
219 if (!sa)
220 return ERROR_INT("sa not defined", procName, 1);
221 if (!fileout)
222 return ERROR_INT("fileout not defined", procName, 1);
223 if (res <= 0) {
224 L_INFO("setting res to 300 ppi\n", procName);
225 res = 300;
226 }
227 if (res < 10 || res > 4000)
228 L_WARNING("res is typically in the range 300-600 ppi\n", procName);
229
230 nfiles = sarrayGetCount(sa);
231 for (i = 0, index = 0; i < nfiles; i++) {
232 fname = sarrayGetString(sa, i, L_NOCOPY);
233 ret = pixReadHeader(fname, &format, NULL, NULL, NULL, NULL, NULL);
234 if (ret) continue;
235 if (format == IFF_UNKNOWN)
236 continue;
237
238 writeImageCompressedToPSFile(fname, fileout, res, &index);
239 }
240
241 return 0;
242}
243
244
245/*
246 * \brief convertFilesFittedToPS()
247 *
248 * \param[in] dirin input directory
249 * \param[in] substr [optional] substring filter on filenames; can be NULL)
250 * \param[in] xpts desired size in printer points; use 0 for default
251 * \param[in] ypts desired size in printer points; use 0 for default
252 * \param[in] fileout output ps file
253 * \return 0 if OK, 1 on error
254 *
255 * <pre>
256 * Notes:
257 * (1) This generates a PS file for all files in a specified directory
258 * that contain the substr pattern to be matched.
259 * (2) Each image is written to a separate page in the output PS file.
260 * (3) All images are written compressed:
261 * * if tiffg4 --> use ccittg4
262 * * if jpeg --> use dct
263 * * all others --> use flate
264 * If the image is jpeg or tiffg4, we use the existing compressed
265 * strings for the encoding; otherwise, we read the image into
266 * a pix and flate-encode the pieces.
267 * (4) The resolution is internally determined such that the images
268 * are rendered, in at least one direction, at 100% of the given
269 * size in printer points. Use 0.0 for xpts or ypts to get
270 * the default value, which is 612.0 or 792.0, rsp.
271 * (5) The size of the PostScript file is independent of the resolution,
272 * because the entire file is encoded. The %xpts and %ypts
273 * parameter tells the PS decomposer how to render the page.
274 * </pre>
275 */
276l_ok
277convertFilesFittedToPS(const char *dirin,
278 const char *substr,
279 l_float32 xpts,
280 l_float32 ypts,
281 const char *fileout)
282{
283SARRAY *sa;
284
285 PROCNAME("convertFilesFittedToPS");
286
287 if (!dirin)
288 return ERROR_INT("dirin not defined", procName, 1);
289 if (!fileout)
290 return ERROR_INT("fileout not defined", procName, 1);
291 if (xpts <= 0.0) {
292 L_INFO("setting xpts to 612.0 ppi\n", procName);
293 xpts = 612.0;
294 }
295 if (ypts <= 0.0) {
296 L_INFO("setting ypts to 792.0 ppi\n", procName);
297 ypts = 792.0;
298 }
299 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
300 L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
301
302 /* Get all filtered and sorted full pathnames. */
303 sa = getSortedPathnamesInDirectory(dirin, substr, 0, 0);
304
305 /* Generate the PS file. Don't use bounding boxes. */
306 l_psWriteBoundingBox(FALSE);
307 sarrayConvertFilesFittedToPS(sa, xpts, ypts, fileout);
308 l_psWriteBoundingBox(TRUE);
309 sarrayDestroy(&sa);
310 return 0;
311}
312
313
314/*
315 * \brief sarrayConvertFilesFittedToPS()
316 *
317 * \param[in] sarray of full path names
318 * \param[in] xpts desired size in printer points; use 0 for default
319 * \param[in] ypts desired size in printer points; use 0 for default
320 * \param[in] fileout output ps file
321 * \return 0 if OK, 1 on error
322 *
323 * <pre>
324 * Notes:
325 * (1) See convertFilesFittedToPS()
326 * </pre>
327 */
328l_ok
329sarrayConvertFilesFittedToPS(SARRAY *sa,
330 l_float32 xpts,
331 l_float32 ypts,
332 const char *fileout)
333{
334char *fname;
335l_int32 ret, i, w, h, nfiles, index, format, res;
336
337 PROCNAME("sarrayConvertFilesFittedToPS");
338
339 if (!sa)
340 return ERROR_INT("sa not defined", procName, 1);
341 if (!fileout)
342 return ERROR_INT("fileout not defined", procName, 1);
343 if (xpts <= 0.0) {
344 L_INFO("setting xpts to 612.0\n", procName);
345 xpts = 612.0;
346 }
347 if (ypts <= 0.0) {
348 L_INFO("setting ypts to 792.0\n", procName);
349 ypts = 792.0;
350 }
351 if (xpts < 100.0 || xpts > 2000.0 || ypts < 100.0 || ypts > 2000.0)
352 L_WARNING("xpts,ypts are typically in the range 500-800\n", procName);
353
354 nfiles = sarrayGetCount(sa);
355 for (i = 0, index = 0; i < nfiles; i++) {
356 fname = sarrayGetString(sa, i, L_NOCOPY);
357 ret = pixReadHeader(fname, &format, &w, &h, NULL, NULL, NULL);
358 if (ret) continue;
359 if (format == IFF_UNKNOWN)
360 continue;
361
362 /* Be sure the entire image is wrapped */
363 if (xpts * h < ypts * w)
364 res = (l_int32)((l_float32)w * 72.0 / xpts);
365 else
366 res = (l_int32)((l_float32)h * 72.0 / ypts);
367
368 writeImageCompressedToPSFile(fname, fileout, res, &index);
369 }
370
371 return 0;
372}
373
374
375/*
376 * \brief writeImageCompressedToPSFile()
377 *
378 * \param[in] filein input image file
379 * \param[in] fileout output ps file
380 * \param[in] res output printer resolution
381 * \param[in,out] pindex index of image in output ps file
382 * \return 0 if OK, 1 on error
383 *
384 * <pre>
385 * Notes:
386 * (1) This wraps a single page image in PS.
387 * (2) The input file can be in any format. It is compressed as follows:
388 * * if in tiffg4 --> use ccittg4
389 * * if in jpeg --> use dct
390 * * all others --> use flate
391 * (3) Before the first call, set %index = 0. %index is incremented
392 * if the page is successfully written. It is used to decide
393 * whether to write (index == 0) or append (index > 0) to the file.
394 * </pre>
395 */
396l_ok
397writeImageCompressedToPSFile(const char *filein,
398 const char *fileout,
399 l_int32 res,
400 l_int32 *pindex)
401{
402const char *op;
403l_int32 format, retval;
404
405 PROCNAME("writeImageCompressedToPSFile");
406
407 if (!pindex)
408 return ERROR_INT("&index not defined", procName, 1);
409
410 findFileFormat(filein, &format);
411 if (format == IFF_UNKNOWN) {
412 L_ERROR("format of %s not known\n", procName, filein);
413 return 1;
414 }
415
416 op = (*pindex == 0) ? "w" : "a";
417 if (format == IFF_JFIF_JPEG) {
418 retval = convertJpegToPS(filein, fileout, op, 0, 0,
419 res, 1.0, *pindex + 1, TRUE);
420 } else if (format == IFF_TIFF_G4) {
421 retval = convertG4ToPS(filein, fileout, op, 0, 0,
422 res, 1.0, *pindex + 1, FALSE, TRUE);
423 } else { /* all other image formats */
424 retval = convertFlateToPS(filein, fileout, op, 0, 0,
425 res, 1.0, *pindex + 1, TRUE);
426 }
427 if (retval == 0) (*pindex)++;
428
429 return retval;
430}
431
432
433/*-------------------------------------------------------------*
434 * Convert mixed text/image files to PS *
435 *-------------------------------------------------------------*/
436/*
437 * \brief convertSegmentedPagesToPS()
438 *
439 * \param[in] pagedir input page image directory
440 * \param[in] pagestr [optional] substring filter on page filenames;
441 * can be NULL
442 * \param[in] page_numpre number of characters in page name before number
443 * \param[in] maskdir input mask image directory
444 * \param[in] maskstr [optional] substring filter on mask filenames;
445 * can be NULL
446 * \param[in] mask_numpre number of characters in mask name before number
447 * \param[in] numpost number of characters in names after number
448 * \param[in] maxnum only consider page numbers up to this value
449 * \param[in] textscale scale of text output relative to pixs
450 * \param[in] imagescale scale of image output relative to pixs
451 * \param[in] threshold for binarization; typ. about 190; 0 for default
452 * \param[in] fileout output ps file
453 * \return 0 if OK, 1 on error
454 *
455 * <pre>
456 * Notes:
457 * (1) This generates a PS file for all page image and mask files in two
458 * specified directories and that contain the page numbers as
459 * specified below. The two directories can be the same, in which
460 * case the page and mask files are differentiated by the two
461 * substrings for string matches.
462 * (2) The page images are taken in lexicographic order.
463 * Mask images whose numbers match the page images are used to
464 * segment the page images. Page images without a matching
465 * mask image are scaled, thresholded and rendered entirely as text.
466 * (3) Each PS page is generated as a compressed representation of
467 * the page image, where the part of the image under the mask
468 * is suitably scaled and compressed as DCT (i.e., jpeg), and
469 * the remaining part of the page is suitably scaled, thresholded,
470 * compressed as G4 (i.e., tiff g4), and rendered by painting
471 * black through the resulting text mask.
472 * (4) The scaling is typically 2x down for the DCT component
473 * (%imagescale = 0.5) and 2x up for the G4 component
474 * (%textscale = 2.0).
475 * (5) The resolution is automatically set to fit to a
476 * letter-size (8.5 x 11 inch) page.
477 * (6) Both the DCT and the G4 encoding are PostScript level 2.
478 * (7) It is assumed that the page number is contained within
479 * the basename (the filename without directory or extension).
480 * %page_numpre is the number of characters in the page basename
481 * preceding the actual page number; %mask_numpre is likewise for
482 * the mask basename; %numpost is the number of characters
483 * following the page number. For example, for mask name
484 * mask_006.tif, mask_numpre = 5 ("mask_).
485 * (8) To render a page as is -- that is, with no thresholding
486 * of any pixels -- use a mask in the mask directory that is
487 * full size with all pixels set to 1. If the page is 1 bpp,
488 * it is not necessary to have a mask.
489 * </pre>
490 */
491l_ok
492convertSegmentedPagesToPS(const char *pagedir,
493 const char *pagestr,
494 l_int32 page_numpre,
495 const char *maskdir,
496 const char *maskstr,
497 l_int32 mask_numpre,
498 l_int32 numpost,
499 l_int32 maxnum,
500 l_float32 textscale,
501 l_float32 imagescale,
502 l_int32 threshold,
503 const char *fileout)
504{
505l_int32 pageno, i, npages;
506PIX *pixs, *pixm;
507SARRAY *sapage, *samask;
508
509 PROCNAME("convertSegmentedPagesToPS");
510
511 if (!pagedir)
512 return ERROR_INT("pagedir not defined", procName, 1);
513 if (!maskdir)
514 return ERROR_INT("maskdir not defined", procName, 1);
515 if (!fileout)
516 return ERROR_INT("fileout not defined", procName, 1);
517 if (threshold <= 0) {
518 L_INFO("setting threshold to 190\n", procName);
519 threshold = 190;
520 }
521
522 /* Get numbered full pathnames; max size of sarray is maxnum */
523 sapage = getNumberedPathnamesInDirectory(pagedir, pagestr,
524 page_numpre, numpost, maxnum);
525 samask = getNumberedPathnamesInDirectory(maskdir, maskstr,
526 mask_numpre, numpost, maxnum);
527 sarrayPadToSameSize(sapage, samask, "");
528 if ((npages = sarrayGetCount(sapage)) == 0) {
529 sarrayDestroy(&sapage);
530 sarrayDestroy(&samask);
531 return ERROR_INT("no matching pages found", procName, 1);
532 }
533
534 /* Generate the PS file */
535 pageno = 1;
536 for (i = 0; i < npages; i++) {
537 if ((pixs = pixReadIndexed(sapage, i)) == NULL)
538 continue;
539 pixm = pixReadIndexed(samask, i);
540 pixWriteSegmentedPageToPS(pixs, pixm, textscale, imagescale,
541 threshold, pageno, fileout);
542 pixDestroy(&pixs);
543 pixDestroy(&pixm);
544 pageno++;
545 }
546
547 sarrayDestroy(&sapage);
548 sarrayDestroy(&samask);
549 return 0;
550}
551
552
553/*
554 * \brief pixWriteSegmentedPageToPS()
555 *
556 * \param[in] pixs all depths; colormap ok
557 * \param[in] pixm [optional] 1 bpp segmentation mask over image region
558 * \param[in] textscale scale of text output relative to pixs
559 * \param[in] imagescale scale of image output relative to pixs
560 * \param[in] threshold for binarization; typ. about 190; 0 for default
561 * \param[in] pageno page number in set; use 1 for new output file
562 * \param[in] fileout output ps file
563 * \return 0 if OK, 1 on error
564 *
565 * <pre>
566 * Notes:
567 * (1) This generates the PS string for a mixed text/image page,
568 * and adds it to an existing file if %pageno > 1.
569 * The PS output is determined by fitting the result to
570 * a letter-size (8.5 x 11 inch) page.
571 * (2) The two images (pixs and pixm) are at the same resolution
572 * (typically 300 ppi). They are used to generate two compressed
573 * images, pixb and pixc, that are put directly into the output
574 * PS file.
575 * (3) pixb is the text component. In the PostScript world, we think of
576 * it as a mask through which we paint black. It is produced by
577 * scaling pixs by %textscale, and thresholding to 1 bpp.
578 * (4) pixc is the image component, which is that part of pixs under
579 * the mask pixm. It is scaled from pixs by %imagescale.
580 * (5) Typical values are textscale = 2.0 and imagescale = 0.5.
581 * (6) If pixm == NULL, the page has only text. If it is all black,
582 * the page is all image and has no text.
583 * (7) This can be used to write a multi-page PS file, by using
584 * sequential page numbers with the same output file. It can
585 * also be used to write separate PS files for each page,
586 * by using different output files with %pageno = 0 or 1.
587 * </pre>
588 */
589l_ok
590pixWriteSegmentedPageToPS(PIX *pixs,
591 PIX *pixm,
592 l_float32 textscale,
593 l_float32 imagescale,
594 l_int32 threshold,
595 l_int32 pageno,
596 const char *fileout)
597{
598l_int32 alltext, notext, d, ret;
599l_uint32 val;
600l_float32 scaleratio;
601PIX *pixmi, *pixmis, *pixt, *pixg, *pixsc, *pixb, *pixc;
602
603 PROCNAME("pixWriteSegmentedPageToPS");
604
605 if (!pixs)
606 return ERROR_INT("pixs not defined", procName, 1);
607 if (!fileout)
608 return ERROR_INT("fileout not defined", procName, 1);
609 if (imagescale <= 0.0 || textscale <= 0.0)
610 return ERROR_INT("relative scales must be > 0.0", procName, 1);
611
612 /* Analyze the page. Determine the ratio by which the
613 * binary text mask is scaled relative to the image part.
614 * If there is no image region (alltext == TRUE), the
615 * text mask will be rendered directly to fit the page,
616 * and scaleratio = 1.0. */
617 alltext = TRUE;
618 notext = FALSE;
619 scaleratio = 1.0;
620 if (pixm) {
621 pixZero(pixm, &alltext); /* pixm empty: all text */
622 if (alltext) {
623 pixm = NULL; /* treat it as not existing here */
624 } else {
625 pixmi = pixInvert(NULL, pixm);
626 pixZero(pixmi, &notext); /* pixm full; no text */
627 pixDestroy(&pixmi);
628 scaleratio = textscale / imagescale;
629 }
630 }
631
632 if (pixGetDepth(pixs) == 1) { /* render tiff g4 */
633 pixb = pixClone(pixs);
634 pixc = NULL;
635 } else {
636 pixt = pixConvertTo8Or32(pixs, L_CLONE, 0); /* clone if possible */
637
638 /* Get the binary text mask. Note that pixg cannot be a
639 * clone of pixs, because it may be altered by pixSetMasked(). */
640 pixb = NULL;
641 if (notext == FALSE) {
642 d = pixGetDepth(pixt);
643 if (d == 8)
644 pixg = pixCopy(NULL, pixt);
645 else /* d == 32 */
646 pixg = pixConvertRGBToLuminance(pixt);
647 if (pixm) /* clear out the image parts */
648 pixSetMasked(pixg, pixm, 255);
649 if (textscale == 1.0)
650 pixsc = pixClone(pixg);
651 else if (textscale >= 0.7)
652 pixsc = pixScaleGrayLI(pixg, textscale, textscale);
653 else
654 pixsc = pixScaleAreaMap(pixg, textscale, textscale);
655 pixb = pixThresholdToBinary(pixsc, threshold);
656 pixDestroy(&pixg);
657 pixDestroy(&pixsc);
658 }
659
660 /* Get the scaled image region */
661 pixc = NULL;
662 if (pixm) {
663 if (imagescale == 1.0)
664 pixsc = pixClone(pixt); /* can possibly be a clone of pixs */
665 else
666 pixsc = pixScale(pixt, imagescale, imagescale);
667
668 /* If pixm is not full, clear the pixels in pixsc
669 * corresponding to bg in pixm, where there can be text
670 * that is written through the mask pixb. Note that
671 * we could skip this and use pixsc directly in
672 * pixWriteMixedToPS(); however, clearing these
673 * non-image regions to a white background will reduce
674 * the size of pixc (relative to pixsc), and hence
675 * reduce the size of the PS file that is generated.
676 * Use a copy so that we don't accidentally alter pixs. */
677 if (notext == FALSE) {
678 pixmis = pixScale(pixm, imagescale, imagescale);
679 pixmi = pixInvert(NULL, pixmis);
680 val = (d == 8) ? 0xff : 0xffffff00;
681 pixc = pixCopy(NULL, pixsc);
682 pixSetMasked(pixc, pixmi, val); /* clear non-image part */
683 pixDestroy(&pixmis);
684 pixDestroy(&pixmi);
685 } else {
686 pixc = pixClone(pixsc);
687 }
688 pixDestroy(&pixsc);
689 }
690 pixDestroy(&pixt);
691 }
692
693 /* Generate the PS file. Don't use bounding boxes. */
694 l_psWriteBoundingBox(FALSE);
695 ret = pixWriteMixedToPS(pixb, pixc, scaleratio, pageno, fileout);
696 l_psWriteBoundingBox(TRUE);
697 pixDestroy(&pixb);
698 pixDestroy(&pixc);
699 return ret;
700}
701
702
703/*
704 * \brief pixWriteMixedToPS()
705 *
706 * \param[in] pixb [optional] 1 bpp mask; typically for text
707 * \param[in] pixc [optional] 8 or 32 bpp image regions
708 * \param[in] scale scale factor for rendering pixb, relative to pixc;
709 * typ. 4.0
710 * \param[in] pageno page number in set; use 1 for new output file
711 * \param[in] fileout output ps file
712 * \return 0 if OK, 1 on error
713 *
714 * <pre>
715 * Notes:
716 * (1) This low level function generates the PS string for a mixed
717 * text/image page, and adds it to an existing file if
718 * %pageno > 1.
719 * (2) The two images (pixb and pixc) are typically generated at the
720 * resolution that they will be rendered in the PS file.
721 * (3) pixb is the text component. In the PostScript world, we think of
722 * it as a mask through which we paint black.
723 * (4) pixc is the (typically halftone) image component. It is
724 * white in the rest of the page. To minimize the size of the
725 * PS file, it should be rendered at a resolution that is at
726 * least equal to its actual resolution.
727 * (5) %scale gives the ratio of resolution of pixb to pixc.
728 * Typical resolutions are: 600 ppi for pixb, 150 ppi for pixc;
729 * so %scale = 4.0. If one of the images is not defined,
730 * the value of %scale is ignored.
731 * (6) We write pixc with DCT compression (jpeg). This is followed
732 * by painting the text as black through the mask pixb. If
733 * pixc doesn't exist (alltext), we write the text with the
734 * PS "image" operator instead of the "imagemask" operator,
735 * because ghostscript's ps2pdf is flaky when the latter is used.
736 * (7) The actual output resolution is determined by fitting the
737 * result to a letter-size (8.5 x 11 inch) page.
738 * <pre>
739 */
740l_ok
741pixWriteMixedToPS(PIX *pixb,
742 PIX *pixc,
743 l_float32 scale,
744 l_int32 pageno,
745 const char *fileout)
746{
747char *tname;
748const char *op;
749l_int32 resb, resc, endpage, maskop, ret;
750
751 PROCNAME("pixWriteMixedToPS");
752
753 if (!pixb && !pixc)
754 return ERROR_INT("pixb and pixc both undefined", procName, 1);
755 if (!fileout)
756 return ERROR_INT("fileout not defined", procName, 1);
757
758 /* Compute the resolution that fills a letter-size page. */
759 if (!pixc) {
760 resb = getResLetterPage(pixGetWidth(pixb), pixGetHeight(pixb), 0);
761 } else {
762 resc = getResLetterPage(pixGetWidth(pixc), pixGetHeight(pixc), 0);
763 if (pixb)
764 resb = (l_int32)(scale * resc);
765 }
766
767 /* Write the jpeg image first */
768 if (pixc) {
769 tname = l_makeTempFilename();
770 pixWrite(tname, pixc, IFF_JFIF_JPEG);
771 endpage = (pixb) ? FALSE : TRUE;
772 op = (pageno <= 1) ? "w" : "a";
773 ret = convertJpegToPS(tname, fileout, op, 0, 0, resc, 1.0,
774 pageno, endpage);
775 lept_rmfile(tname);
776 LEPT_FREE(tname);
777 if (ret)
778 return ERROR_INT("jpeg data not written", procName, 1);
779 }
780
781 /* Write the binary data, either directly or, if there is
782 * a jpeg image on the page, through the mask. */
783 if (pixb) {
784 tname = l_makeTempFilename();
785 pixWrite(tname, pixb, IFF_TIFF_G4);
786 op = (pageno <= 1 && !pixc) ? "w" : "a";
787 maskop = (pixc) ? 1 : 0;
788 ret = convertG4ToPS(tname, fileout, op, 0, 0, resb, 1.0,
789 pageno, maskop, 1);
790 lept_rmfile(tname);
791 LEPT_FREE(tname);
792 if (ret)
793 return ERROR_INT("tiff data not written", procName, 1);
794 }
795
796 return 0;
797}
798
799
800/*-------------------------------------------------------------*
801 * Convert any image file to PS for embedding *
802 *-------------------------------------------------------------*/
803/*
804 * \brief convertToPSEmbed()
805 *
806 * \param[in] filein input image file, any format
807 * \param[in] fileout output ps file
808 * \param[in] level PostScript compression: 1 (uncompressed), 2 or 3
809 * \return 0 if OK, 1 on error
810 *
811 * <pre>
812 * Notes:
813 * (1) This is a wrapper function that generates a PS file with
814 * a bounding box, from any input image file.
815 * (2) Do the best job of compression given the specified level.
816 * %level=3 does flate compression on anything that is not
817 * tiffg4 (1 bpp) or jpeg (8 bpp or rgb).
818 * (3) If %level=2 and the file is not tiffg4 or jpeg, it will
819 * first be written to file as jpeg with quality = 75.
820 * This will remove the colormap and cause some degradation
821 * in the image.
822 * (4) The bounding box is required when a program such as TeX
823 * (through epsf) places and rescales the image. It is
824 * sized for fitting the image to an 8.5 x 11.0 inch page.
825 * </pre>
826 */
827l_ok
828convertToPSEmbed(const char *filein,
829 const char *fileout,
830 l_int32 level)
831{
832char *tname;
833l_int32 d, format;
834PIX *pix, *pixs;
835
836 PROCNAME("convertToPSEmbed");
837
838 if (!filein)
839 return ERROR_INT("filein not defined", procName, 1);
840 if (!fileout)
841 return ERROR_INT("fileout not defined", procName, 1);
842 if (level != 1 && level != 2 && level != 3) {
843 L_ERROR("invalid level specified; using level 2\n", procName);
844 level = 2;
845 }
846
847 if (level == 1) { /* no compression */
848 pixWritePSEmbed(filein, fileout);
849 return 0;
850 }
851
852 /* Find the format and write out directly if in jpeg or tiff g4 */
853 findFileFormat(filein, &format);
854 if (format == IFF_JFIF_JPEG) {
855 convertJpegToPSEmbed(filein, fileout);
856 return 0;
857 } else if (format == IFF_TIFF_G4) {
858 convertG4ToPSEmbed(filein, fileout);
859 return 0;
860 } else if (format == IFF_UNKNOWN) {
861 L_ERROR("format of %s not known\n", procName, filein);
862 return 1;
863 }
864
865 /* If level 3, flate encode. */
866 if (level == 3) {
867 convertFlateToPSEmbed(filein, fileout);
868 return 0;
869 }
870
871 /* OK, it's level 2, so we must convert to jpeg or tiff g4 */
872 if ((pixs = pixRead(filein)) == NULL)
873 return ERROR_INT("image not read from file", procName, 1);
874 d = pixGetDepth(pixs);
875 if ((d == 2 || d == 4) && !pixGetColormap(pixs))
876 pix = pixConvertTo8(pixs, 0);
877 else if (d == 16)
878 pix = pixConvert16To8(pixs, L_MS_BYTE);
879 else
881 pixDestroy(&pixs);
882 if (!pix)
883 return ERROR_INT("converted pix not made", procName, 1);
884
885 d = pixGetDepth(pix);
886 tname = l_makeTempFilename();
887 if (d == 1) {
888 if (pixWrite(tname, pix, IFF_TIFF_G4)) {
889 LEPT_FREE(tname);
890 pixDestroy(&pix);
891 return ERROR_INT("g4 tiff not written", procName, 1);
892 }
893 convertG4ToPSEmbed(tname, fileout);
894 } else {
895 if (pixWrite(tname, pix, IFF_JFIF_JPEG)) {
896 LEPT_FREE(tname);
897 pixDestroy(&pix);
898 return ERROR_INT("jpeg not written", procName, 1);
899 }
900 convertJpegToPSEmbed(tname, fileout);
901 }
902
903 lept_rmfile(tname);
904 LEPT_FREE(tname);
905 pixDestroy(&pix);
906 return 0;
907}
908
909
910/*-------------------------------------------------------------*
911 * Write all images in a pixa out to PS *
912 *-------------------------------------------------------------*/
913/*
914 * \brief pixaWriteCompressedToPS()
915 *
916 * \param[in] pixa any set of images
917 * \param[in] fileout output ps file
918 * \param[in] res resolution for the set of input images
919 * \param[in] level PostScript compression capability: 2 or 3
920 * \return 0 if OK, 1 on error
921 *
922 * <pre>
923 * Notes:
924 * (1) This generates a PostScript file of multiple page images,
925 * all with bounding boxes.
926 * (2) See pixWriteCompressedToPS() for details.
927 * (3) To generate a pdf from %fileout, use:
928 * ps2pdf <infile.ps> <outfile.pdf>
929 * </pre>
930 */
931l_ok
932pixaWriteCompressedToPS(PIXA *pixa,
933 const char *fileout,
934 l_int32 res,
935 l_int32 level)
936{
937l_int32 i, n, index, ret;
938PIX *pix;
939
940 PROCNAME("pixaWriteCompressedToPS");
941
942 if (!pixa)
943 return ERROR_INT("pixa not defined", procName, 1);
944 if (!fileout)
945 return ERROR_INT("fileout not defined", procName, 1);
946 if (level != 2 && level != 3) {
947 L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
948 level = 2;
949 }
950
951 index = 0;
952 n = pixaGetCount(pixa);
953 for (i = 0; i < n; i++) {
954 pix = pixaGetPix(pixa, i, L_CLONE);
955 ret = pixWriteCompressedToPS(pix, fileout, res, level, &index);
956 if (ret) L_ERROR("PS string not written for image %d\n", procName, i);
957 pixDestroy(&pix);
958 }
959 return 0;
960}
961
962
963/*
964 * \brief pixWriteCompressedToPS()
965 *
966 * \param[in] pix any depth; colormap OK
967 * \param[in] fileout output ps file
968 * \param[in] res of input image
969 * \param[in] level PostScript compression capability: 2 or 3
970 * \param[in,out] pindex index of image in output ps file
971 * \return 0 if OK, 1 on error
972 *
973 * <pre>
974 * Notes:
975 * (1) This generates a PostScript string for %pix, and writes it
976 * to a file, with a bounding box.
977 * (2) *pindex keeps track of the number of images that have been
978 * written to %fileout. If this is the first image to be
979 * converted, set *pindex == 0 before passing it in. If the
980 * PostScript string is successfully generated, this will increment
981 * *pindex. If *pindex > 0, the PostScript string will be
982 * appended to %fileout.
983 * (3) PostScript level 2 enables lossless tiffg4 and lossy jpeg
984 * compression. Level 3 adds lossless flate (essentially gzip)
985 * compression.
986 * * For images with a colormap, lossless flate is often better in
987 * both quality and size than jpeg.
988 * * The decision for images without a colormap affects compression
989 * efficiency: %level2 (jpeg) is usually better than %level3 (flate)
990 * * Because jpeg does not handle 16 bpp, if %level == 2, the image
991 * is converted to 8 bpp (using MSB) and compressed with jpeg,
992 * cmap + level2: jpeg
993 * cmap + level3: flate
994 * 1 bpp: tiffg4
995 * 2 or 4 bpp + level2: jpeg
996 * 2 or 4 bpp + level3: flate
997 * 8 bpp + level2: jpeg
998 * 8 bpp + level3: flate
999 * 16 bpp + level2: jpeg [converted to 8 bpp, with warning]
1000 * 16 bpp + level3: flate
1001 * 32 bpp + level2: jpeg
1002 * 32 bpp + level3: flate
1003 * </pre>
1004 */
1005l_ok
1006pixWriteCompressedToPS(PIX *pix,
1007 const char *fileout,
1008 l_int32 res,
1009 l_int32 level,
1010 l_int32 *pindex)
1011{
1012char *tname;
1013l_int32 writeout, d;
1014PIX *pixt;
1015PIXCMAP *cmap;
1016
1017 PROCNAME("pixWriteCompressedToPS");
1018
1019 if (!pix)
1020 return ERROR_INT("pix not defined", procName, 1);
1021 if (!fileout)
1022 return ERROR_INT("fileout not defined", procName, 1);
1023 if (level != 2 && level != 3) {
1024 L_ERROR("only levels 2 and 3 permitted; using level 2\n", procName);
1025 level = 2;
1026 }
1027 if (!pindex)
1028 return ERROR_INT("&index not defined", procName, 1);
1029
1030 tname = l_makeTempFilename();
1031 writeout = TRUE;
1032 d = pixGetDepth(pix);
1033 cmap = pixGetColormap(pix);
1034 if (d == 1) {
1035 if (pixWrite(tname, pix, IFF_TIFF_G4))
1036 writeout = FALSE;
1037 } else if (level == 3) {
1038 if (pixWrite(tname, pix, IFF_PNG))
1039 writeout = FALSE;
1040 } else { /* level == 2 */
1041 if (cmap) {
1042 pixt = pixConvertForPSWrap(pix);
1043 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1044 writeout = FALSE;
1045 pixDestroy(&pixt);
1046 } else if (d == 16) {
1047 L_WARNING("d = 16; converting to 8 bpp for jpeg\n", procName);
1048 pixt = pixConvert16To8(pix, L_MS_BYTE);
1049 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1050 writeout = FALSE;
1051 pixDestroy(&pixt);
1052 } else if (d == 2 || d == 4) {
1053 pixt = pixConvertTo8(pix, 0);
1054 if (pixWrite(tname, pixt, IFF_JFIF_JPEG))
1055 writeout = FALSE;
1056 pixDestroy(&pixt);
1057 } else if (d == 8 || d == 32) {
1058 if (pixWrite(tname, pix, IFF_JFIF_JPEG))
1059 writeout = FALSE;
1060 } else { /* shouldn't happen */
1061 L_ERROR("invalid depth with level 2: %d\n", procName, d);
1062 writeout = FALSE;
1063 }
1064 }
1065
1066 if (writeout)
1067 writeImageCompressedToPSFile(tname, fileout, res, pindex);
1068
1069 if (lept_rmfile(tname) != 0)
1070 L_ERROR("temp file %s was not deleted\n", procName, tname);
1071 LEPT_FREE(tname);
1072 return (writeout) ? 0 : 1;
1073}
1074
1075/* --------------------------------------------*/
1076#endif /* USE_PSIO */
1077/* --------------------------------------------*/
PIX * pixThresholdToBinary(PIX *pixs, l_int32 thresh)
pixThresholdToBinary()
Definition: grayquant.c:447
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:593
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:705
l_ok pixZero(PIX *pix, l_int32 *pempty)
pixZero()
Definition: pix3.c:1815
l_ok pixSetMasked(PIX *pixd, PIX *pixm, l_uint32 val)
pixSetMasked()
Definition: pix3.c:163
PIX * pixInvert(PIX *pixd, PIX *pixs)
pixInvert()
Definition: pix3.c:1509
@ REMOVE_CMAP_BASED_ON_SRC
Definition: pix.h:260
@ L_CLONE
Definition: pix.h:713
@ L_NOCOPY
Definition: pix.h:710
@ L_MS_BYTE
Definition: pix.h:849
l_int32 pixaGetCount(PIXA *pixa)
pixaGetCount()
Definition: pixabasic.c:650
PIX * pixaGetPix(PIXA *pixa, l_int32 index, l_int32 accesstype)
pixaGetPix()
Definition: pixabasic.c:691
PIX * pixConvertRGBToLuminance(PIX *pixs)
pixConvertRGBToLuminance()
Definition: pixconv.c:742
PIX * pixConvertTo8(PIX *pixs, l_int32 cmapflag)
pixConvertTo8()
Definition: pixconv.c:3133
PIX * pixConvertTo8Or32(PIX *pixs, l_int32 copyflag, l_int32 warnflag)
pixConvertTo8Or32()
Definition: pixconv.c:3492
PIX * pixRemoveColormap(PIX *pixs, l_int32 type)
pixRemoveColormap()
Definition: pixconv.c:328
PIX * pixConvert16To8(PIX *pixs, l_int32 type)
pixConvert16To8()
Definition: pixconv.c:1762
PIX * pixConvertForPSWrap(PIX *pixs)
pixConvertForPSWrap()
Definition: pixconv.c:3931
l_ok convertFlateToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertFlateToPS()
Definition: psio2.c:1667
l_ok convertJpegToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 endpage)
convertJpegToPS()
Definition: psio2.c:794
l_ok pixWritePSEmbed(const char *filein, const char *fileout)
pixWritePSEmbed()
Definition: psio2.c:188
l_ok convertFlateToPSEmbed(const char *filein, const char *fileout)
convertFlateToPSEmbed()
Definition: psio2.c:1553
l_ok convertG4ToPSEmbed(const char *filein, const char *fileout)
convertG4ToPSEmbed()
Definition: psio2.c:1076
l_ok convertJpegToPSEmbed(const char *filein, const char *fileout)
convertJpegToPSEmbed()
Definition: psio2.c:678
l_ok convertG4ToPS(const char *filein, const char *fileout, const char *operation, l_int32 x, l_int32 y, l_int32 res, l_float32 scale, l_int32 pageno, l_int32 maskflag, l_int32 endpage)
convertG4ToPS()
Definition: psio2.c:1182
l_int32 getResLetterPage(l_int32 w, l_int32 h, l_float32 fillfract)
getResLetterPage()
Definition: psio2.c:1992
PIX * pixReadIndexed(SARRAY *sa, l_int32 index)
pixReadIndexed()
Definition: readfile.c:281
PIX * pixRead(const char *filename)
pixRead()
Definition: readfile.c:193
l_ok pixReadHeader(const char *filename, l_int32 *pformat, l_int32 *pw, l_int32 *ph, l_int32 *pbps, l_int32 *pspp, l_int32 *piscmap)
pixReadHeader()
Definition: readfile.c:446
l_ok findFileFormat(const char *filename, l_int32 *pformat)
findFileFormat()
Definition: readfile.c:584
SARRAY * getSortedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 first, l_int32 nfiles)
getSortedPathnamesInDirectory()
Definition: sarray1.c:1848
l_ok sarrayPadToSameSize(SARRAY *sa1, SARRAY *sa2, const char *padstring)
sarrayPadToSameSize()
Definition: sarray1.c:1064
char * sarrayGetString(SARRAY *sa, l_int32 index, l_int32 copyflag)
sarrayGetString()
Definition: sarray1.c:703
l_int32 sarrayGetCount(SARRAY *sa)
sarrayGetCount()
Definition: sarray1.c:643
void sarrayDestroy(SARRAY **psa)
sarrayDestroy()
Definition: sarray1.c:362
SARRAY * getNumberedPathnamesInDirectory(const char *dirname, const char *substr, l_int32 numpre, l_int32 numpost, l_int32 maxnum)
getNumberedPathnamesInDirectory()
Definition: sarray1.c:1800
PIX * pixScale(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScale()
Definition: scale1.c:250
PIX * pixScaleGrayLI(PIX *pixs, l_float32 scalex, l_float32 scaley)
pixScaleGrayLI()
Definition: scale1.c:780
PIX * pixScaleAreaMap(PIX *pix, l_float32 scalex, l_float32 scaley)
pixScaleAreaMap()
Definition: scale1.c:1914
Definition: pix.h:139
Definition: pix.h:456
Definition: array.h:127
l_int32 lept_rmfile(const char *filepath)
lept_rmfile()
Definition: utils2.c:2517
char * l_makeTempFilename(void)
l_makeTempFilename()
Definition: utils2.c:3397