Leptonica 1.82.0
Image processing and image analysis suite
finditalic.c
1/*====================================================================*
2 - Copyright (C) 2001 Leptonica. All rights reserved.
3 -
4 - Redistribution and use in source and binary forms, with or without
5 - modification, are permitted provided that the following conditions
6 - are met:
7 - 1. Redistributions of source code must retain the above copyright
8 - notice, this list of conditions and the following disclaimer.
9 - 2. Redistributions in binary form must reproduce the above
10 - copyright notice, this list of conditions and the following
11 - disclaimer in the documentation and/or other materials
12 - provided with the distribution.
13 -
14 - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
15 - ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
16 - LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
17 - A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL ANY
18 - CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
19 - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
20 - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
21 - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
22 - OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
23 - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
24 - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
25 *====================================================================*/
26
27/*
28 * \file finditalic.c
29 * <pre>
30 *
31 * l_int32 pixItalicWords()
32 *
33 * Locate italic words. This is an example of the use of
34 * hit-miss binary morphology with binary reconstruction
35 * (filling from a seed into a mask).
36 *
37 * To see how this works, run with prog/italic.png.
38 * </pre>
39 */
40
41#ifdef HAVE_CONFIG_H
42#include <config_auto.h>
43#endif /* HAVE_CONFIG_H */
44
45#include "allheaders.h"
46
47 /* --------------------------------------------------------------- *
48 * These hit-miss sels match the slanted edge of italic characters *
49 * --------------------------------------------------------------- */
50static const char *str_ital1 = " o x"
51 " "
52 " "
53 " "
54 " o x "
55 " "
56 " C "
57 " "
58 " o x "
59 " "
60 " "
61 " "
62 "o x ";
63
64static const char *str_ital2 = " o x"
65 " "
66 " "
67 " o x "
68 " C "
69 " "
70 " o x "
71 " "
72 " "
73 "o x ";
74
75 /* ------------------------------------------------------------- *
76 * This sel removes noise that is not oriented as a slanted edge *
77 * ------------------------------------------------------------- */
78static const char *str_ital3 = " x"
79 "Cx"
80 "x "
81 "x ";
82
115l_ok
116pixItalicWords(PIX *pixs,
117 BOXA *boxaw,
118 PIX *pixw,
119 BOXA **pboxa,
120 l_int32 debugflag)
121{
122char opstring[32];
123l_int32 size;
124BOXA *boxa;
125PIX *pixsd, *pixm, *pixd;
126SEL *sel_ital1, *sel_ital2, *sel_ital3;
127
128 PROCNAME("pixItalicWords");
129
130 if (!pboxa)
131 return ERROR_INT("&boxa not defined", procName, 1);
132 *pboxa = NULL;
133 if (!pixs)
134 return ERROR_INT("pixs not defined", procName, 1);
135 if (boxaw && pixw)
136 return ERROR_INT("both boxaw and pixw are defined", procName, 1);
137
138 sel_ital1 = selCreateFromString(str_ital1, 13, 6, NULL);
139 sel_ital2 = selCreateFromString(str_ital2, 10, 6, NULL);
140 sel_ital3 = selCreateFromString(str_ital3, 4, 2, NULL);
141
142 /* Make the italic seed: extract with HMT; remove noise.
143 * The noise removal close/open is important to exclude
144 * situations where a small slanted line accidentally
145 * matches sel_ital1. */
146 pixsd = pixHMT(NULL, pixs, sel_ital1);
147 pixClose(pixsd, pixsd, sel_ital3);
148 pixOpen(pixsd, pixsd, sel_ital3);
149
150 /* Make the word mask. Use input boxes or mask if given. */
151 size = 0; /* init */
152 if (boxaw) {
153 pixm = pixCreateTemplate(pixs);
154 pixMaskBoxa(pixm, pixm, boxaw, L_SET_PIXELS);
155 } else if (pixw) {
156 pixm = pixClone(pixw);
157 } else {
158 pixWordMaskByDilation(pixs, NULL, &size, NULL);
159 L_INFO("dilation size = %d\n", procName, size);
160 snprintf(opstring, sizeof(opstring), "d1.5 + c%d.1", size);
161 pixm = pixMorphSequence(pixs, opstring, 0);
162 }
163
164 /* Binary reconstruction to fill in those word mask
165 * components for which there is at least one seed pixel. */
166 pixd = pixSeedfillBinary(NULL, pixsd, pixm, 8);
167 boxa = pixConnComp(pixd, NULL, 8);
168 *pboxa = boxa;
169
170 if (debugflag) {
171 /* Save results at at 2x reduction */
172 l_int32 res, upper;
173 lept_mkdir("lept/ital");
174 BOXA *boxat;
175 GPLOT *gplot;
176 NUMA *na;
177 PIXA *pixa1;
178 PIX *pix1, *pix2, *pix3;
179 pixa1 = pixaCreate(0);
180 boxat = pixConnComp(pixm, NULL, 8);
181 boxaWriteDebug("/tmp/lept/ital/ital.ba", boxat);
182 pixaAddPix(pixa1, pixs, L_COPY); /* orig */
183 pixaAddPix(pixa1, pixsd, L_COPY); /* seed */
184 pix1 = pixConvertTo32(pixm);
185 pixRenderBoxaArb(pix1, boxat, 3, 255, 0, 0);
186 pixaAddPix(pixa1, pix1, L_INSERT); /* mask + outline */
187 pixaAddPix(pixa1, pixd, L_COPY); /* ital mask */
188 pix1 = pixConvertTo32(pixs);
189 pixRenderBoxaArb(pix1, boxa, 3, 255, 0, 0);
190 pixaAddPix(pixa1, pix1, L_INSERT); /* orig + outline */
191 pix1 = pixCreateTemplate(pixs);
192 pix2 = pixSetBlackOrWhiteBoxa(pix1, boxa, L_SET_BLACK);
193 pixCopy(pix1, pixs);
194 pix3 = pixDilateBrick(NULL, pixs, 3, 3);
195 pixCombineMasked(pix1, pix3, pix2);
196 pixaAddPix(pixa1, pix1, L_INSERT); /* ital bolded */
197 pixDestroy(&pix2);
198 pixDestroy(&pix3);
199 pix2 = pixaDisplayTiledInColumns(pixa1, 1, 0.5, 20, 2);
200 pixWriteDebug("/tmp/lept/ital/ital.png", pix2, IFF_PNG);
201 pixDestroy(&pix2);
202
203 /* Assuming the image represents 6 inches of actual page width,
204 * the pixs resolution is approximately
205 * (width of pixs in pixels) / 6
206 * and the images have been saved at half this resolution. */
207 res = pixGetWidth(pixs) / 12;
208 L_INFO("resolution = %d\n", procName, res);
210 pixaConvertToPdf(pixa1, res, 1.0, L_FLATE_ENCODE, 75, "Italic Finder",
211 "/tmp/lept/ital/ital.pdf");
213 pixaDestroy(&pixa1);
214 boxaDestroy(&boxat);
215
216 /* Plot histogram of horizontal white run sizes. A small
217 * initial vertical dilation removes most runs that are neither
218 * inter-character nor inter-word. The larger first peak is
219 * from inter-character runs, and the smaller second peak is
220 * from inter-word runs. */
221 pix1 = pixDilateBrick(NULL, pixs, 1, 15);
222 upper = L_MAX(30, 3 * size);
223 na = pixRunHistogramMorph(pix1, L_RUN_OFF, L_HORIZ, upper);
224 pixDestroy(&pix1);
225 gplot = gplotCreate("/tmp/lept/ital/runhisto", GPLOT_PNG,
226 "Histogram of horizontal runs of white pixels, vs length",
227 "run length", "number of runs");
228 gplotAddPlot(gplot, NULL, na, GPLOT_LINES, "plot1");
229 gplotMakeOutput(gplot);
230 gplotDestroy(&gplot);
231 numaDestroy(&na);
232 }
233
234 selDestroy(&sel_ital1);
235 selDestroy(&sel_ital2);
236 selDestroy(&sel_ital3);
237 pixDestroy(&pixsd);
238 pixDestroy(&pixm);
239 pixDestroy(&pixd);
240 return 0;
241}
l_ok boxaWriteDebug(const char *filename, BOXA *boxa)
boxaWriteDebug()
Definition: boxbasic.c:2245
void boxaDestroy(BOXA **pboxa)
boxaDestroy()
Definition: boxbasic.c:583
PIX * pixMaskBoxa(PIX *pixd, PIX *pixs, BOXA *boxa, l_int32 op)
pixMaskBoxa()
Definition: boxfunc3.c:151
PIX * pixSetBlackOrWhiteBoxa(PIX *pixs, BOXA *boxa, l_int32 op)
pixSetBlackOrWhiteBoxa()
Definition: boxfunc3.c:286
BOXA * pixConnComp(PIX *pixs, PIXA **ppixa, l_int32 connectivity)
pixConnComp()
Definition: conncomp.c:151
l_ok gplotAddPlot(GPLOT *gplot, NUMA *nax, NUMA *nay, l_int32 plotstyle, const char *plotlabel)
gplotAddPlot()
Definition: gplot.c:320
l_ok gplotMakeOutput(GPLOT *gplot)
gplotMakeOutput()
Definition: gplot.c:466
GPLOT * gplotCreate(const char *rootname, l_int32 outformat, const char *title, const char *xlabel, const char *ylabel)
gplotCreate()
Definition: gplot.c:187
void gplotDestroy(GPLOT **pgplot)
gplotDestroy()
Definition: gplot.c:255
l_ok pixRenderBoxaArb(PIX *pix, BOXA *boxa, l_int32 width, l_uint8 rval, l_uint8 gval, l_uint8 bval)
pixRenderBoxaArb()
Definition: graphics.c:1772
@ L_FLATE_ENCODE
Definition: imageio.h:161
PIX * pixDilateBrick(PIX *pixd, PIX *pixs, l_int32 hsize, l_int32 vsize)
pixDilateBrick()
Definition: morph.c:688
PIX * pixOpen(PIX *pixd, PIX *pixs, SEL *sel)
pixOpen()
Definition: morph.c:426
PIX * pixClose(PIX *pixd, PIX *pixs, SEL *sel)
pixClose()
Definition: morph.c:473
PIX * pixHMT(PIX *pixd, PIX *pixs, SEL *sel)
pixHMT()
Definition: morph.c:342
NUMA * pixRunHistogramMorph(PIX *pixs, l_int32 runtype, l_int32 direction, l_int32 maxsize)
pixRunHistogramMorph()
Definition: morphapp.c:1106
PIX * pixMorphSequence(PIX *pixs, const char *sequence, l_int32 dispsep)
pixMorphSequence()
Definition: morphseq.c:137
void numaDestroy(NUMA **pna)
numaDestroy()
Definition: numabasic.c:366
l_ok pixaConvertToPdf(PIXA *pixa, l_int32 res, l_float32 scalefactor, l_int32 type, l_int32 quality, const char *title, const char *fileout)
pixaConvertToPdf()
Definition: pdfio1.c:790
void l_pdfSetDateAndVersion(l_int32 flag)
l_pdfSetDateAndVersion()
Definition: pdfio2.c:2659
void pixDestroy(PIX **ppix)
pixDestroy()
Definition: pix1.c:621
PIX * pixClone(PIX *pixs)
pixClone()
Definition: pix1.c:593
PIX * pixCreateTemplate(const PIX *pixs)
pixCreateTemplate()
Definition: pix1.c:383
PIX * pixCopy(PIX *pixd, const PIX *pixs)
pixCopy()
Definition: pix1.c:705
l_ok pixCombineMasked(PIX *pixd, PIX *pixs, PIX *pixm)
pixCombineMasked()
Definition: pix3.c:382
@ L_SET_PIXELS
Definition: pix.h:772
@ L_COPY
Definition: pix.h:712
@ L_INSERT
Definition: pix.h:711
@ L_SET_BLACK
Definition: pix.h:907
l_ok pixaAddPix(PIXA *pixa, PIX *pix, l_int32 copyflag)
pixaAddPix()
Definition: pixabasic.c:506
void pixaDestroy(PIXA **ppixa)
pixaDestroy()
Definition: pixabasic.c:412
PIXA * pixaCreate(l_int32 n)
pixaCreate()
Definition: pixabasic.c:167
PIX * pixaDisplayTiledInColumns(PIXA *pixas, l_int32 nx, l_float32 scalefactor, l_int32 spacing, l_int32 border)
pixaDisplayTiledInColumns()
Definition: pixafunc2.c:930
PIX * pixConvertTo32(PIX *pixs)
pixConvertTo32()
Definition: pixconv.c:3332
PIX * pixSeedfillBinary(PIX *pixd, PIX *pixs, PIX *pixm, l_int32 connectivity)
pixSeedfillBinary()
Definition: seedfill.c:247
void selDestroy(SEL **psel)
selDestroy()
Definition: sel1.c:340
SEL * selCreateFromString(const char *text, l_int32 h, l_int32 w, const char *name)
selCreateFromString()
Definition: sel1.c:1607
Definition: pix.h:492
Definition: gplot.h:77
Definition: array.h:71
Definition: pix.h:139
Definition: pix.h:456
l_int32 lept_mkdir(const char *subdir)
lept_mkdir()
Definition: utils2.c:2218