Engauge Digitizer 2
Loading...
Searching...
No Matches
GridClassifier.cpp
1/******************************************************************************************************
2 * (C) 2014 markummitchell@github.com. This file is part of Engauge Digitizer, which is released *
3 * under GNU General Public License version 2 (GPLv2) or (at your option) any later version. See file *
4 * LICENSE or go to gnu.org/licenses for details. Distribution requires prior written permission. *
5 ******************************************************************************************************/
6
7#include "ColorFilter.h"
8#include "Correlation.h"
9#include "DocumentModelCoords.h"
10#include "EngaugeAssert.h"
11#include "GridClassifier.h"
12#include <iostream>
13#include "Logger.h"
14#include <QDebug>
15#include <QFile>
16#include <QImage>
17#include "QtToString.h"
18#include "Transformation.h"
19
20int GridClassifier::NUM_PIXELS_PER_HISTOGRAM_BINS = 1;
21double GridClassifier::PEAK_HALF_WIDTH = 4;
22int GridClassifier::MIN_STEP_PIXELS = 4 * GridClassifier::PEAK_HALF_WIDTH; // Step includes down ramp, flat part, up ramp
23const QString GNUPLOT_DELIMITER ("\t");
24
25// We set up the picket fence with binStart arbitrarily set close to zero. Peak is
26// not exactly at zero since we want to include the left side of the first peak.
27int GridClassifier::BIN_START_UNSHIFTED = GridClassifier::PEAK_HALF_WIDTH;
28
29using namespace std;
30
34
35int GridClassifier::binFromCoordinate (double coord,
36 double coordMin,
37 double coordMax) const
38{
39 ENGAUGE_ASSERT (coordMin < coordMax);
40 ENGAUGE_ASSERT (coordMin <= coord);
41 ENGAUGE_ASSERT (coord <= coordMax);
42
43 int bin = 0.5 + (m_numHistogramBins - 1.0) * (coord - coordMin) / (coordMax - coordMin);
44
45 return bin;
46}
47
48void GridClassifier::classify (bool isGnuplot,
49 const QPixmap &originalPixmap,
50 const Transformation &transformation,
51 int &countX,
52 double &startX,
53 double &stepX,
54 int &countY,
55 double &startY,
56 double &stepY)
57{
58 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::classify";
59
60 QImage image = originalPixmap.toImage ();
61
62 m_numHistogramBins = image.width() / NUM_PIXELS_PER_HISTOGRAM_BINS;
63
64 double xMin, xMax, yMin, yMax;
65 double binStartX, binStepX, binStartY, binStepY;
66
67 m_binsX = new double [m_numHistogramBins];
68 m_binsY = new double [m_numHistogramBins];
69
70 computeGraphCoordinateLimits (image,
71 transformation,
72 xMin,
73 xMax,
74 yMin,
75 yMax);
76 initializeHistogramBins ();
77 populateHistogramBins (image,
78 transformation,
79 xMin,
80 xMax,
81 yMin,
82 yMax);
83 searchStartStepSpace (isGnuplot,
84 m_binsX,
85 "x",
86 xMin,
87 xMax,
88 startX,
89 stepX,
90 binStartX,
91 binStepX);
92 searchStartStepSpace (isGnuplot,
93 m_binsY,
94 "y",
95 yMin,
96 yMax,
97 startY,
98 stepY,
99 binStartY,
100 binStepY);
101 searchCountSpace (m_binsX,
102 binStartX,
103 binStepX,
104 countX);
105 searchCountSpace (m_binsY,
106 binStartY,
107 binStepY,
108 countY);
109
110 delete m_binsX;
111 delete m_binsY;
112}
113
114void GridClassifier::computeGraphCoordinateLimits (const QImage &image,
115 const Transformation &transformation,
116 double &xMin,
117 double &xMax,
118 double &yMin,
119 double &yMax)
120{
121 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::computeGraphCoordinateLimits";
122
123 // Project every pixel onto the x axis, and onto the y axis. One set of histogram bins will be
124 // set up along each of the axes. The range of bins will encompass every pixel in the image, and no more.
125
126 QPointF posGraphTL, posGraphTR, posGraphBL, posGraphBR;
127 transformation.transformScreenToRawGraph (QPointF (0, 0) , posGraphTL);
128 transformation.transformScreenToRawGraph (QPointF (image.width(), 0) , posGraphTR);
129 transformation.transformScreenToRawGraph (QPointF (0, image.height()) , posGraphBL);
130 transformation.transformScreenToRawGraph (QPointF (image.width(), image.height()), posGraphBR);
131
132 // Compute x and y ranges for setting up the histogram bins
133 if (transformation.modelCoords().coordsType() == COORDS_TYPE_CARTESIAN) {
134
135 // For affine cartesian coordinates, we only need to look at the screen corners
136 xMin = qMin (qMin (qMin (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
137 xMax = qMax (qMax (qMax (posGraphTL.x(), posGraphTR.x()), posGraphBL.x()), posGraphBR.x());
138 yMin = qMin (qMin (qMin (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
139 yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
140
141 } else {
142
143 // For affine polar coordinates, easiest approach is to assume the full circle
144 xMin = 0.0;
145 xMax = transformation.modelCoords().thetaPeriod();
146 yMin = transformation.modelCoords().originRadius();
147 yMax = qMax (qMax (qMax (posGraphTL.y(), posGraphTR.y()), posGraphBL.y()), posGraphBR.y());
148
149 }
150
151 ENGAUGE_ASSERT (xMin < xMax);
152 ENGAUGE_ASSERT (yMin < yMax);
153}
154
155double GridClassifier::coordinateFromBin (int bin,
156 double coordMin,
157 double coordMax) const
158{
159 ENGAUGE_ASSERT (bin < m_numHistogramBins);
160 ENGAUGE_ASSERT (coordMin < coordMax);
161
162 return coordMin + (coordMax - coordMin) * (double) bin / ((double) m_numHistogramBins - 1.0);
163}
164
165void GridClassifier::copyVectorToVector (const double from [],
166 double to []) const
167{
168 for (int bin = 0; bin < m_numHistogramBins; bin++) {
169 to [bin] = from [bin];
170 }
171}
172
173void GridClassifier::dumpGnuplotCoordinate (const QString &coordinateLabel,
174 double corr,
175 const double *bins,
176 double coordinateMin,
177 double coordinateMax,
178 int binStart,
179 int binStep) const
180{
181 QString filename = QString ("gridclassifier_%1_corr%2_startMax%3_stepMax%4.gnuplot")
182 .arg (coordinateLabel)
183 .arg (corr, 8, 'f', 3, '0')
184 .arg (binStart)
185 .arg (binStep);
186
187 cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
188
189 QFile fileDump (filename);
190 fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
191 QTextStream strDump (&fileDump);
192
193 int bin;
194
195 // For consistent scaling, get the max bin count
196 int binCountMax = 0;
197 for (bin = 0; bin < m_numHistogramBins; bin++) {
198 if (bins [bin] > binCountMax) {
199 binCountMax = qMax ((double) binCountMax,
200 bins [bin]);
201 }
202 }
203
204 // Get picket fence
205 double *picketFence = new double [m_numHistogramBins];
206 loadPicketFence (picketFence,
207 binStart,
208 binStep,
209 0,
210 false);
211
212 // Header
213 strDump << "bin"
214 << GNUPLOT_DELIMITER << "coordinate"
215 << GNUPLOT_DELIMITER << "binCount"
216 << GNUPLOT_DELIMITER << "startStep"
217 << GNUPLOT_DELIMITER << "picketFence" << "\n";
218
219 // Data, with one row per coordinate value
220 for (bin = 0; bin < m_numHistogramBins; bin++) {
221
222 double coordinate = coordinateFromBin (bin,
223 coordinateMin,
224 coordinateMax);
225 double startStepValue (((bin - binStart) % binStep == 0) ? 1 : 0);
226 strDump << bin
227 << GNUPLOT_DELIMITER << coordinate
228 << GNUPLOT_DELIMITER << bins [bin]
229 << GNUPLOT_DELIMITER << binCountMax * startStepValue
230 << GNUPLOT_DELIMITER << binCountMax * picketFence [bin] << "\n";
231 }
232
233 delete [] picketFence;
234}
235
236void GridClassifier::dumpGnuplotCorrelations (const QString &coordinateLabel,
237 double valueMin,
238 double valueMax,
239 const double signalA [],
240 const double signalB [],
241 const double correlations [])
242{
243 QString filename = QString ("gridclassifier_%1_correlations.gnuplot")
244 .arg (coordinateLabel);
245
246 cout << "Writing gnuplot file: " << filename.toLatin1().data() << "\n";
247
248 QFile fileDump (filename);
249 fileDump.open (QIODevice::WriteOnly | QIODevice::Text);
250 QTextStream strDump (&fileDump);
251
252 int bin;
253
254 // Compute max values so curves can be normalized to the same heights
255 double signalAMax = 1, signalBMax = 1, correlationsMax = 1;
256 for (bin = 0; bin < m_numHistogramBins; bin++) {
257 if (bin == 0 || signalA [bin] > signalAMax) {
258 signalAMax = signalA [bin];
259 }
260 if (bin == 0 || signalB [bin] > signalBMax) {
261 signalBMax = signalB [bin];
262 }
263 if (bin == 0 || correlations [bin] > correlationsMax) {
264 correlationsMax = correlations [bin];
265 }
266 }
267
268 // Output normalized curves
269 for (int bin = 0; bin < m_numHistogramBins; bin++) {
270
271 strDump << coordinateFromBin (bin,
272 valueMin,
273 valueMax)
274 << GNUPLOT_DELIMITER << signalA [bin] / signalAMax
275 << GNUPLOT_DELIMITER << signalB [bin] / signalBMax
276 << GNUPLOT_DELIMITER << correlations [bin] / correlationsMax << "\n";
277 }
278}
279
280void GridClassifier::initializeHistogramBins ()
281{
282 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::initializeHistogramBins";
283
284 for (int bin = 0; bin < m_numHistogramBins; bin++) {
285 m_binsX [bin] = 0;
286 m_binsY [bin] = 0;
287 }
288}
289
290void GridClassifier::loadPicketFence (double picketFence [],
291 int binStart,
292 int binStep,
293 int count,
294 bool isCount) const
295{
296 const double PEAK_HEIGHT = 1.0; // Arbitrary height, since normalization will counteract any change to this value
297
298 // Count argument is optional. Note that binStart already excludes PEAK_HALF_WIDTH bins at start,
299 // and we also exclude PEAK_HALF_WIDTH bins at the end
300 ENGAUGE_ASSERT (binStart >= PEAK_HALF_WIDTH);
301 if (!isCount) {
302 count = 1 + (m_numHistogramBins - binStart - PEAK_HALF_WIDTH) / binStep;
303 }
304
305 // Bins that we need to worry about
306 int binStartMinusHalfWidth = binStart - PEAK_HALF_WIDTH;
307 int binStopPlusHalfWidth = (binStart + (count - 1) * binStep) + PEAK_HALF_WIDTH;
308
309 // To normalize, we compute the area under the picket fence. Constraint is
310 // areaUnnormalized + NUM_HISTOGRAM_BINS * normalizationOffset = 0
311 double areaUnnormalized = count * PEAK_HEIGHT * PEAK_HALF_WIDTH;
312 double normalizationOffset = -1.0 * areaUnnormalized / m_numHistogramBins;
313
314 for (int bin = 0; bin < m_numHistogramBins; bin++) {
315
316 // Outside of the peaks, bin is small negative so correlation with unit function is zero. In other
317 // words, the function is normalized
318 picketFence [bin] = normalizationOffset;
319
320 if ((binStartMinusHalfWidth <= bin) &&
321 (bin <= binStopPlusHalfWidth)) {
322
323 // Closest peak
324 int ordinalClosestPeak = (int) ((bin - binStart + binStep / 2) / binStep);
325 int binClosestPeak = binStart + ordinalClosestPeak * binStep;
326
327 // Distance from closest peak is used to define an isosceles triangle
328 int distanceToClosestPeak = qAbs (bin - binClosestPeak);
329
330 if (distanceToClosestPeak < PEAK_HALF_WIDTH) {
331
332 // Map 0 to PEAK_HALF_WIDTH to 1 to 0
333 picketFence [bin] = 1.0 - (double) distanceToClosestPeak / PEAK_HALF_WIDTH + normalizationOffset;
334
335 }
336 }
337 }
338}
339
340void GridClassifier::populateHistogramBins (const QImage &image,
341 const Transformation &transformation,
342 double xMin,
343 double xMax,
344 double yMin,
345 double yMax)
346{
347 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::populateHistogramBins";
348
349 ColorFilter filter;
350 QRgb rgbBackground = filter.marginColor (&image);
351
352 for (int x = 0; x < image.width(); x++) {
353 for (int y = 0; y < image.height(); y++) {
354
355 QColor pixel = image.pixel (x, y);
356
357 // Skip pixels with background color
358 if (!filter.colorCompare (rgbBackground,
359 pixel.rgb ())) {
360
361 // Add this pixel to histograms
362 QPointF posGraph;
363 transformation.transformScreenToRawGraph (QPointF (x, y), posGraph);
364
365 if (transformation.modelCoords().coordsType() == COORDS_TYPE_POLAR) {
366
367 // If out of the 0 to period range, the theta value must shifted by the period to get into that range
368 while (posGraph.x() < xMin) {
369 posGraph.setX (posGraph.x() + transformation.modelCoords().thetaPeriod());
370 }
371 while (posGraph.x() > xMax) {
372 posGraph.setX (posGraph.x() - transformation.modelCoords().thetaPeriod());
373 }
374 }
375
376 int binX = binFromCoordinate (posGraph.x(), xMin, xMax);
377 int binY = binFromCoordinate (posGraph.y(), yMin, yMax);
378
379 ENGAUGE_ASSERT (0 <= binX);
380 ENGAUGE_ASSERT (0 <= binY);
381 ENGAUGE_ASSERT (binX < m_numHistogramBins);
382 ENGAUGE_ASSERT (binY < m_numHistogramBins);
383
384 // Roundoff error in log scaling may let bin go just outside legal range
385 binX = qMin (binX, m_numHistogramBins - 1);
386 binY = qMin (binY, m_numHistogramBins - 1);
387
388 ++m_binsX [binX];
389 ++m_binsY [binY];
390 }
391 }
392 }
393}
394
395void GridClassifier::searchCountSpace (double bins [],
396 double binStart,
397 double binStep,
398 int &countMax)
399{
400 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchCountSpace"
401 << " start=" << binStart
402 << " step=" << binStep;
403
404 // Loop though the space of possible counts
405 Correlation correlation (m_numHistogramBins);
406 double *picketFence = new double [m_numHistogramBins];
407 double corr, corrMax;
408 bool isFirst = true;
409 int countStop = 1 + (m_numHistogramBins - binStart) / binStep;
410 for (int count = 2; count <= countStop; count++) {
411
412 loadPicketFence (picketFence,
413 binStart,
414 binStep,
415 count,
416 true);
417
418 correlation.correlateWithoutShift (m_numHistogramBins,
419 bins,
420 picketFence,
421 corr);
422 if (isFirst || (corr > corrMax)) {
423 countMax = count;
424 corrMax = corr;
425 }
426
427 isFirst = false;
428 }
429
430 delete picketFence;
431}
432
433void GridClassifier::searchStartStepSpace (bool isGnuplot,
434 double bins [],
435 const QString &coordinateLabel,
436 double valueMin,
437 double valueMax,
438 double &start,
439 double &step,
440 double &binStartMax,
441 double &binStepMax)
442{
443 LOG4CPP_INFO_S ((*mainCat)) << "GridClassifier::searchStartStepSpace";
444
445 // Correlations are tracked for logging
446 double *signalA = new double [m_numHistogramBins];
447 double *signalB = new double [m_numHistogramBins];
448 double *correlations = new double [m_numHistogramBins];
449 double *correlationsMax = new double [m_numHistogramBins];
450
451 // Loop though the space of possible gridlines using the independent variables (start,step).
452 Correlation correlation (m_numHistogramBins);
453 double *picketFence = new double [m_numHistogramBins];
454 int binStart;
455 double corr = 0, corrMax = 0;
456 bool isFirst = true;
457
458 // We do not explicitly search(=loop) through binStart here, since Correlation::correlateWithShift will take
459 // care of that for us
460
461 // Step search starts out small, and stops at value that gives count substantially greater than 2. Freakishly small
462 // images need to have MIN_STEP_PIXELS overridden so the loop iterates at least once
463 for (int binStep = qMin (MIN_STEP_PIXELS, m_numHistogramBins / 8); binStep < m_numHistogramBins / 4; binStep++) {
464
465 loadPicketFence (picketFence,
466 BIN_START_UNSHIFTED,
467 binStep,
468 PEAK_HALF_WIDTH,
469 false);
470
471 correlation.correlateWithShift (m_numHistogramBins,
472 bins,
473 picketFence,
474 binStart,
475 corr,
476 correlations);
477 if (isFirst || (corr > corrMax)) {
478
479 int binStartMaxNext = binStart + BIN_START_UNSHIFTED + 1; // Compensate for the shift performed inside loadPicketFence
480
481 // Make sure binStartMax never goes out of bounds
482 if (binStartMaxNext < m_numHistogramBins) {
483
484 binStartMax = binStartMaxNext;
485 binStepMax = binStep;
486 corrMax = corr;
487 copyVectorToVector (bins, signalA);
488 copyVectorToVector (picketFence, signalB);
489 copyVectorToVector (correlations, correlationsMax);
490
491 // Output a gnuplot file. We should see the correlation values consistently increasing
492 if (isGnuplot) {
493
494 dumpGnuplotCoordinate(coordinateLabel,
495 corr,
496 bins,
497 valueMin,
498 valueMax,
499 binStart,
500 binStep);
501 }
502 }
503 }
504
505 isFirst = false;
506 }
507
508 // Convert from bins back to graph coordinates
509 start = coordinateFromBin (binStartMax,
510 valueMin,
511 valueMax);
512 double next = coordinateFromBin (binStartMax + binStepMax,
513 valueMin,
514 valueMax);
515 step = next - start;
516
517 if (isGnuplot) {
518 dumpGnuplotCorrelations (coordinateLabel,
519 valueMin,
520 valueMax,
521 signalA,
522 signalB,
523 correlationsMax);
524 }
525
526 delete signalA;
527 delete signalB;
528 delete correlations;
529 delete correlationsMax;
530 delete picketFence;
531}
Class for filtering image to remove unimportant information.
Definition ColorFilter.h:19
QRgb marginColor(const QImage *image) const
Identify the margin color of the image, which is defined as the most common color in the four margins...
bool colorCompare(QRgb rgb1, QRgb rgb2) const
See if the two color values are close enough to be considered to be the same.
Fast cross correlation between two functions.
Definition Correlation.h:15
double thetaPeriod() const
Return the period of the theta value for polar coordinates, consistent with CoordThetaUnits.
CoordsType coordsType() const
Get method for coordinates type.
double originRadius() const
Get method for origin radius in polar mode.
GridClassifier()
Single constructor.
void classify(bool isGnuplot, const QPixmap &originalPixmap, const Transformation &transformation, int &countX, double &startX, double &stepX, int &countY, double &startY, double &stepY)
Classify the specified image, and return the most probably x and y grid settings.
Affine transformation between screen and graph coordinates, based on digitized axis points.
void transformScreenToRawGraph(const QPointF &coordScreen, QPointF &coordGraph) const
Transform from cartesian pixel screen coordinates to cartesian/polar graph coordinates.
DocumentModelCoords modelCoords() const
Get method for DocumentModelCoords.