The Battle for Wesnoth  1.13.4+dev
 All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Modules Pages
xbrz.cpp
Go to the documentation of this file.
1 /*
2  Copyright (C) 2014 - 2016 by Chris Beck <[email protected]>
3  Part of the Battle for Wesnoth Project http://www.wesnoth.org/
4  This program is free software; you can redistribute it and/or modify
5  it under the terms of the GNU General Public License as published by
6  the Free Software Foundation; either version 2 of the License, or
7  (at your option) any later version.
8  This program is distributed in the hope that it will be useful,
9  but WITHOUT ANY WARRANTY.
10  See the COPYING file for more details.
11 
12  This is a derivative work of the xBRZ component of the HqMAME project
13  by Zenju. The original Licensing statment follows, indented with //
14  The primary changes are, syntactic to make it compile with C99+Boost,
15  and to make it handle an alpha channel in the image in a manner proper
16  for SDL.
17 
18  It is not possible to extend the MAME 'special exception' to all of
19  the Battle for Wesnoth project, however, the special exception is
20  granted for my derivative forms of this work.
21 */
22 
23 // ****************************************************************************
24 // * This file is part of the HqMAME project. It is distributed under *
25 // * GNU General Public License: http://www.gnu.org/licenses/gpl.html *
26 // * Copyright (C) Zenju (zenju AT gmx DOT de) - All Rights Reserved *
27 // * *
28 // * Additionally and as a special exception, the author gives permission *
29 // * to link the code of this program with the MAME library (or with modified *
30 // * versions of MAME that use the same license as MAME), and distribute *
31 // * linked combinations including the two. You must obey the GNU General *
32 // * Public License in all respects for all of the code used other than MAME. *
33 // * If you modify this file, you may extend this exception to your version *
34 // * of the file, but you are not obligated to do so. If you do not wish to *
35 // * do so, delete this exception statement from your version. *
36 // ****************************************************************************
37 
38 #include "xbrz.hpp"
39 #include "config.hpp"
40 #include <cassert>
41 #include <cmath>
42 #include <algorithm>
43 
44 #include "utils/functional.hpp"
45 
46 #if defined(__GNUC__) && !defined(__clang__) && !defined(__WIN32__) // We only want this for gcc, not clang or tdm-gcc
47 #if __GNUC__ < 4 || (__GNUC__ == 4 && __GNUC_MINOR__ <= 8 )
48 #pragma GCC diagnostic ignored "-Wmaybe-uninitialized" //Suppress uninitialized variables warnings on GCC <= 4.8.x
49 #endif
50 #endif
51 
52 namespace
53 {
54 template <uint32_t N> inline
55 unsigned char getByte(uint32_t val) { return static_cast<unsigned char>((val >> (8 * N)) & 0xff); }
56 
57 inline unsigned char getRed (uint32_t val) { return getByte<2>(val); }
58 inline unsigned char getGreen(uint32_t val) { return getByte<1>(val); }
59 inline unsigned char getBlue (uint32_t val) { return getByte<0>(val); }
60 
61 template <class T> inline
62 T abs(T value)
63 {
64  //static_assert(std::is_signed<T>::value, "");
65  return value < 0 ? -value : value;
66 }
67 
68 const uint32_t redMask = 0xff0000;
69 const uint32_t greenMask = 0x00ff00;
70 const uint32_t blueMask = 0x0000ff;
71 const uint32_t alphaMask = 0xff000000;
72 
73 template <unsigned int N, unsigned int M> inline
74 void alphaBlend(uint32_t& dst, uint32_t col) //blend color over destination with opacity N / M
75 {
76  //static_assert(N < 256, "possible overflow of (col & redMask) * N");
77  //static_assert(M < 256, "possible overflow of (col & redMask ) * N + (dst & redMask ) * (M - N)");
78  //static_assert(0 < N && N < M, "");
79 
80  //Note: I had to change this to perform alpha compositing -- xbrz assumes there is no alpha channel (and sets it to zero when it blends), our
81  //sprites have alpha however.
82  uint32_t col_alpha = col >> 24; // & with alphaMask is unnecessary
83 
84  if (!col_alpha) return;
85 
86  uint32_t dst_alpha = dst >> 24;
87 
88  if (!dst_alpha) {
89  dst = col;
90  return;
91  }
92 
93  //uint32_t out_alpha = 0xffff - (((0xff - col_alpha)* (0xff - dst_alpha)) >> 8);
94 
95  //TODO: Figure out if there's some way to combine the multiplicative approached with the "averaged alpha", and to feedback the
96  // alpha into the colors, without making it all very slow. Current approach looks okay, but I think shadows could be better,
97  // also I think some units are getting 'black outlines' now because their black pixels with 0 alpha (background) are getting
98  // averaged with their foreground.
99 
100  dst = (redMask & ((col & redMask ) * N + (dst & redMask ) * (M - N)) / M) | //this works because 8 upper bits are free
101  (greenMask & ((col & greenMask ) * N + (dst & greenMask ) * (M - N)) / M) |
102  (blueMask & ((col & blueMask ) * N + (dst & blueMask ) * (M - N)) / M) |
103  (alphaMask & (((col_alpha * N + dst_alpha * (M - N)) / M) << 24)); // need to downshift and upshift because of overflow
104 
105 /*
106  if (!(dst >> 24)) {
107  dst = (col & (redMask | greenMask | blueMask)) |
108  (((((col >> 24) * N) / M) << 24) & alphaMask);
109  return;
110  }
111 */
112 /*
113 
114  double src_alpha = static_cast<double>(col >> 24) / 256; //xbrz basically assumes there is no alpha channel, our sprites have alpha however.
115  double dst_alpha = static_cast<double>(dst >> 24) / 256;
116 
117  src_alpha = 1 - ((1 - src_alpha) * (1 - (N/M))); //apply blending arguments
118 
119  // For discussion of alpha compositing, see here: http://en.wikipedia.org/wiki/Alpha_compositing#Analytical_derivation_of_the_over_operator
120  double out_alpha = 1 - ((1- src_alpha) * (1-dst_alpha));
121 
122  double src_coeff = src_alpha / out_alpha;
123 
124  double dst_coeff = dst_alpha / out_alpha;
125 
126 
127 
128  uint32_t red_val = (((col & redMask ) >> 16) * src_coeff) + (((dst & redMask ) >> 16) * dst_coeff);
129 
130  uint32_t grn_val = (((col & greenMask) >> 8 ) * src_coeff) + (((dst & greenMask) >> 8 ) * dst_coeff);
131 
132  uint32_t blu_val = (((col & blueMask ) >> 0 ) * src_coeff) + (((dst & blueMask ) >> 0 ) * dst_coeff);
133 
134 
135 
136  dst = (red_val << 16) |
137  (grn_val << 8 ) |
138  (blu_val << 0) |
139  (alphaMask & (static_cast<uint32_t>(256 * out_alpha) << 24));
140 // 0xff000000; //adding this to try to get rid of black outlines, there are code comments that say 0 is transparent for SDL, not 255 -- iceiceice
141 */
142 }
143 
144 
145 //inline
146 //double fastSqrt(double n)
147 //{
148 // __asm //speeds up xBRZ by about 9% compared to std::sqrt
149 // {
150 // fld n
151 // fsqrt
152 // }
153 //}
154 //
155 
156 #if 0
157 inline
158 uint32_t alphaBlend2(uint32_t pix1, uint32_t pix2, double alpha)
159 {
160  return (redMask & static_cast<uint32_t>((pix1 & redMask ) * alpha + (pix2 & redMask ) * (1 - alpha))) |
161  (greenMask & static_cast<uint32_t>((pix1 & greenMask) * alpha + (pix2 & greenMask) * (1 - alpha))) |
162  (blueMask & static_cast<uint32_t>((pix1 & blueMask ) * alpha + (pix2 & blueMask ) * (1 - alpha)));
163 }
164 #endif
165 
166 uint32_t* byteAdvance( uint32_t* ptr, int bytes) { return reinterpret_cast< uint32_t*>(reinterpret_cast< char*>(ptr) + bytes); }
167 const uint32_t* byteAdvance(const uint32_t* ptr, int bytes) { return reinterpret_cast<const uint32_t*>(reinterpret_cast<const char*>(ptr) + bytes); }
168 
169 
170 //fill block with the given color
171 inline
172 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int blockWidth, int blockHeight)
173 {
174  //for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
175  // std::fill(trg, trg + blockWidth, col);
176 
177  for (int y = 0; y < blockHeight; ++y, trg = byteAdvance(trg, pitch))
178  for (int x = 0; x < blockWidth; ++x)
179  trg[x] = col;
180 }
181 
182 inline
183 void fillBlock(uint32_t* trg, int pitch, uint32_t col, int n) { fillBlock(trg, pitch, col, n, n); }
184 
185 
186 #ifdef _MSC_VER
187 #define FORCE_INLINE __forceinline
188 #elif defined __GNUC__
189 #define FORCE_INLINE __attribute__((always_inline)) inline
190 #else
191 #define FORCE_INLINE inline
192 #endif
193 
194 
195 enum RotationDegree //clock-wise
196 {
197  ROT_0,
198  ROT_90,
199  ROT_180,
200  ROT_270
201 };
202 
203 //calculate input matrix coordinates after rotation at compile time
204 template <RotationDegree rotDeg, size_t I, size_t J, size_t N>
205 struct MatrixRotation;
206 
207 template <size_t I, size_t J, size_t N>
208 struct MatrixRotation<ROT_0, I, J, N>
209 {
210  static const size_t I_old = I;
211  static const size_t J_old = J;
212 };
213 
214 template <RotationDegree rotDeg, size_t I, size_t J, size_t N> //(i, j) = (row, col) indices, N = size of (square) matrix
215 struct MatrixRotation
216 {
217  static const size_t I_old = N - 1 - MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::J_old; //old coordinates before rotation!
218  static const size_t J_old = MatrixRotation<static_cast<RotationDegree>(rotDeg - 1), I, J, N>::I_old; //
219 };
220 
221 
222 template <size_t N, RotationDegree rotDeg>
223 class OutputMatrix
224 {
225 public:
226  OutputMatrix(uint32_t* out, int outWidth) : //access matrix area, top-left at position "out" for image with given width
227  out_(out),
228  outWidth_(outWidth) {}
229 
230  template <size_t I, size_t J>
231  uint32_t& ref() const
232  {
233  static const size_t I_old = MatrixRotation<rotDeg, I, J, N>::I_old;
234  static const size_t J_old = MatrixRotation<rotDeg, I, J, N>::J_old;
235  return *(out_ + J_old + I_old * outWidth_);
236  }
237 
238 private:
239  uint32_t* out_;
240  const int outWidth_;
241 };
242 
243 
244 template <class T> inline
245 T square(T value) { return value * value; }
246 
247 
248 /*
249 inline
250 void rgbtoLuv(uint32_t c, double& L, double& u, double& v)
251 {
252  //http://www.easyrgb.com/index.php?X=MATH&H=02#text2
253  double r = getRed (c) / 255.0;
254  double g = getGreen(c) / 255.0;
255  double b = getBlue (c) / 255.0;
256 
257  if ( r > 0.04045 )
258  r = std::pow(( ( r + 0.055 ) / 1.055 ) , 2.4);
259  else
260  r /= 12.92;
261  if ( g > 0.04045 )
262  g = std::pow(( ( g + 0.055 ) / 1.055 ) , 2.4);
263  else
264  g /= 12.92;
265  if ( b > 0.04045 )
266  b = std::pow(( ( b + 0.055 ) / 1.055 ) , 2.4);
267  else
268  b /= 12.92;
269 
270  r *= 100;
271  g *= 100;
272  b *= 100;
273 
274  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
275  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
276  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
277  //---------------------
278  double var_U = 4 * x / ( x + 15 * y + 3 * z );
279  double var_V = 9 * y / ( x + 15 * y + 3 * z );
280  double var_Y = y / 100;
281 
282  if ( var_Y > 0.008856 ) var_Y = std::pow(var_Y , 1.0/3 );
283  else var_Y = 7.787 * var_Y + 16.0 / 116;
284 
285  const double ref_X = 95.047; //Observer= 2 (degrees), Illuminant= D65
286  const double ref_Y = 100.000;
287  const double ref_Z = 108.883;
288 
289  const double ref_U = ( 4 * ref_X ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
290  const double ref_V = ( 9 * ref_Y ) / ( ref_X + ( 15 * ref_Y ) + ( 3 * ref_Z ) );
291 
292  L = ( 116 * var_Y ) - 16;
293  u = 13 * L * ( var_U - ref_U );
294  v = 13 * L * ( var_V - ref_V );
295 }
296 */
297 
298 #if 0
299 inline
300 void rgbtoLab(uint32_t c, unsigned char& L, signed char& A, signed char& B)
301 {
302  //code: http://www.easyrgb.com/index.php?X=MATH
303  //test: http://www.workwithcolor.com/color-converter-01.htm
304  //------RGB to XYZ------
305  double r = getRed (c) / 255.0;
306  double g = getGreen(c) / 255.0;
307  double b = getBlue (c) / 255.0;
308 
309  r = r > 0.04045 ? std::pow(( r + 0.055 ) / 1.055, 2.4) : r / 12.92;
310  r = g > 0.04045 ? std::pow(( g + 0.055 ) / 1.055, 2.4) : g / 12.92;
311  r = b > 0.04045 ? std::pow(( b + 0.055 ) / 1.055, 2.4) : b / 12.92;
312 
313  r *= 100;
314  g *= 100;
315  b *= 100;
316 
317  double x = 0.4124564 * r + 0.3575761 * g + 0.1804375 * b;
318  double y = 0.2126729 * r + 0.7151522 * g + 0.0721750 * b;
319  double z = 0.0193339 * r + 0.1191920 * g + 0.9503041 * b;
320  //------XYZ to Lab------
321  const double refX = 95.047; //
322  const double refY = 100.000; //Observer= 2 (degrees), Illuminant= D65
323  const double refZ = 108.883; //
324  double var_X = x / refX;
325  double var_Y = y / refY;
326  double var_Z = z / refZ;
327 
328  var_X = var_X > 0.008856 ? std::pow(var_X, 1.0 / 3) : 7.787 * var_X + 4.0 / 29;
329  var_Y = var_Y > 0.008856 ? std::pow(var_Y, 1.0 / 3) : 7.787 * var_Y + 4.0 / 29;
330  var_Z = var_Z > 0.008856 ? std::pow(var_Z, 1.0 / 3) : 7.787 * var_Z + 4.0 / 29;
331 
332  L = static_cast<unsigned char>(116 * var_Y - 16);
333  A = static_cast< signed char>(500 * (var_X - var_Y));
334  B = static_cast< signed char>(200 * (var_Y - var_Z));
335 };
336 #endif
337 
338 #if 0
339 inline
340 double distLAB(uint32_t pix1, uint32_t pix2)
341 {
342  unsigned char L1 = 0; //[0, 100]
343  signed char a1 = 0; //[-128, 127]
344  signed char b1 = 0; //[-128, 127]
345  rgbtoLab(pix1, L1, a1, b1);
346 
347  unsigned char L2 = 0;
348  signed char a2 = 0;
349  signed char b2 = 0;
350  rgbtoLab(pix2, L2, a2, b2);
351 
352  //-----------------------------
353  //http://www.easyrgb.com/index.php?X=DELT
354 
355  //Delta E/CIE76
356  return std::sqrt(square(1.0 * L1 - L2) +
357  square(1.0 * a1 - a2) +
358  square(1.0 * b1 - b2));
359 }
360 #endif
361 
362 /*
363 inline
364 void rgbtoHsl(uint32_t c, double& h, double& s, double& l)
365 {
366  //http://www.easyrgb.com/index.php?X=MATH&H=18#text18
367  const int r = getRed (c);
368  const int g = getGreen(c);
369  const int b = getBlue (c);
370 
371  const int varMin = numeric::min(r, g, b);
372  const int varMax = numeric::max(r, g, b);
373  const int delMax = varMax - varMin;
374 
375  l = (varMax + varMin) / 2.0 / 255.0;
376 
377  if (delMax == 0) //gray, no chroma...
378  {
379  h = 0;
380  s = 0;
381  }
382  else
383  {
384  s = l < 0.5 ?
385  delMax / (1.0 * varMax + varMin) :
386  delMax / (2.0 * 255 - varMax - varMin);
387 
388  double delR = ((varMax - r) / 6.0 + delMax / 2.0) / delMax;
389  double delG = ((varMax - g) / 6.0 + delMax / 2.0) / delMax;
390  double delB = ((varMax - b) / 6.0 + delMax / 2.0) / delMax;
391 
392  if (r == varMax)
393  h = delB - delG;
394  else if (g == varMax)
395  h = 1 / 3.0 + delR - delB;
396  else if (b == varMax)
397  h = 2 / 3.0 + delG - delR;
398 
399  if (h < 0)
400  h += 1;
401  if (h > 1)
402  h -= 1;
403  }
404 }
405 
406 inline
407 double distHSL(uint32_t pix1, uint32_t pix2, double lightningWeight)
408 {
409  double h1 = 0;
410  double s1 = 0;
411  double l1 = 0;
412  rgbtoHsl(pix1, h1, s1, l1);
413  double h2 = 0;
414  double s2 = 0;
415  double l2 = 0;
416  rgbtoHsl(pix2, h2, s2, l2);
417 
418  //HSL is in cylindric coordinatates where L represents height, S radius, H angle,
419  //however we interpret the cylinder as a bi-conic solid with top/bottom radius 0, middle radius 1
420  assert(0 <= h1 && h1 <= 1);
421  assert(0 <= h2 && h2 <= 1);
422 
423  double r1 = l1 < 0.5 ?
424  l1 * 2 :
425  2 - l1 * 2;
426 
427  double x1 = r1 * s1 * std::cos(h1 * 2 * numeric::pi);
428  double y1 = r1 * s1 * std::sin(h1 * 2 * numeric::pi);
429  double z1 = l1;
430 
431  double r2 = l2 < 0.5 ?
432  l2 * 2 :
433  2 - l2 * 2;
434 
435  double x2 = r2 * s2 * std::cos(h2 * 2 * numeric::pi);
436  double y2 = r2 * s2 * std::sin(h2 * 2 * numeric::pi);
437  double z2 = l2;
438 
439  return 255 * std::sqrt(square(x1 - x2) + square(y1 - y2) + square(lightningWeight * (z1 - z2)));
440 }
441 */
442 
443 #if 0
444 inline
445 double distRGB(uint32_t pix1, uint32_t pix2)
446 {
447  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
448  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
449  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
450 
451  //euklidean RGB distance
452  return std::sqrt(square(r_diff) + square(g_diff) + square(b_diff));
453 }
454 #endif
455 
456 #if 0
457 inline
458 double distNonLinearRGB(uint32_t pix1, uint32_t pix2)
459 {
460  //non-linear rgb: http://www.compuphase.com/cmetric.htm
461  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
462  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
463  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
464 
465  const double r_avg = (static_cast<double>(getRed(pix1)) + getRed(pix2)) / 2;
466  return std::sqrt((2 + r_avg / 255) * square(r_diff) + 4 * square(g_diff) + (2 + (255 - r_avg) / 255) * square(b_diff));
467 }
468 #endif
469 
470 inline
471 double distYCbCr(uint32_t pix1, uint32_t pix2, double lumaWeight)
472 {
473  //http://en.wikipedia.org/wiki/YCbCr#ITU-R_BT.601_conversion
474  //YCbCr conversion is a matrix multiplication => take advantage of linearity by subtracting first!
475  const int r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2); //we may delay division by 255 to after matrix multiplication
476  const int g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2); //
477  const int b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2); //substraction for int is noticeable faster than for double!
478 
479  const double k_b = 0.0722; //ITU-R BT.709 conversion
480  const double k_r = 0.2126; //
481  const double k_g = 1 - k_b - k_r;
482 
483  const double scale_b = 0.5 / (1 - k_b);
484  const double scale_r = 0.5 / (1 - k_r);
485 
486  const double y = k_r * r_diff + k_g * g_diff + k_b * b_diff; //[!], analog YCbCr!
487  const double c_b = scale_b * (b_diff - y);
488  const double c_r = scale_r * (r_diff - y);
489 
490  //we skip division by 255 to have similar range like other distance functions
491  return std::sqrt(square(lumaWeight * y) + square(c_b) + square(c_r));
492 }
493 
494 #if 0
495 inline
496 double distYUV(uint32_t pix1, uint32_t pix2, double luminanceWeight)
497 {
498  //perf: it's not worthwhile to buffer the YUV-conversion, the direct code is faster by ~ 6%
499  //since RGB -> YUV conversion is essentially a matrix multiplication, we can calculate the RGB diff before the conversion (distributive property)
500  const double r_diff = static_cast<int>(getRed (pix1)) - getRed (pix2);
501  const double g_diff = static_cast<int>(getGreen(pix1)) - getGreen(pix2);
502  const double b_diff = static_cast<int>(getBlue (pix1)) - getBlue (pix2);
503 
504  //http://en.wikipedia.org/wiki/YUV#Conversion_to.2Ffrom_RGB
505  const double w_b = 0.114;
506  const double w_r = 0.299;
507  const double w_g = 1 - w_r - w_b;
508 
509  const double u_max = 0.436;
510  const double v_max = 0.615;
511 
512  const double scale_u = u_max / (1 - w_b);
513  const double scale_v = v_max / (1 - w_r);
514 
515  double y = w_r * r_diff + w_g * g_diff + w_b * b_diff;//value range: 255 * [-1, 1]
516  double u = scale_u * (b_diff - y); //value range: 255 * 2 * u_max * [-1, 1]
517  double v = scale_v * (r_diff - y); //value range: 255 * 2 * v_max * [-1, 1]
518 
519 #ifndef NDEBUG
520  const double eps = 0.5;
521 #endif
522  assert(std::abs(y) <= 255 + eps);
523  assert(std::abs(u) <= 255 * 2 * u_max + eps);
524  assert(std::abs(v) <= 255 * 2 * v_max + eps);
525 
526  return std::sqrt(square(luminanceWeight * y) + square(u) + square(v));
527 }
528 #endif
529 
530 inline
531 double colorDist(uint32_t pix1, uint32_t pix2, double luminanceWeight)
532 {
533  if (pix1 == pix2) //about 8% perf boost
534  return 0;
535 
536  //return distHSL(pix1, pix2, luminanceWeight);
537  //return distRGB(pix1, pix2);
538  //return distLAB(pix1, pix2);
539  //return distNonLinearRGB(pix1, pix2);
540  //return distYUV(pix1, pix2, luminanceWeight);
541 
542  return distYCbCr(pix1, pix2, luminanceWeight);
543 }
544 
545 
547 {
548  BLEND_NONE = 0,
549  BLEND_NORMAL, //a normal indication to blend
550  BLEND_DOMINANT, //a strong indication to blend
551  //attention: BlendType must fit into the value range of 2 bit!!!
552 };
553 
554 struct BlendResult
555 {
556  BlendType
557  /**/blend_f, blend_g,
558  /**/blend_j, blend_k;
559 
560  BlendResult() {}
561 };
562 
563 
564 struct Kernel_4x4 //kernel for preprocessing step
565 {
566  uint32_t
567  /**/a, b, c, d,
568  /**/e, f, g, h,
569  /**/i, j, k, l,
570  /**/m, n, o, p;
571 
572  Kernel_4x4() {}
573 };
574 
575 /*
576 input kernel area naming convention:
577 -----------------
578 | A | B | C | D |
579 ----|---|---|---|
580 | E | F | G | H | //evalute the four corners between F, G, J, K
581 ----|---|---|---| //input pixel is at position F
582 | I | J | K | L |
583 ----|---|---|---|
584 | M | N | O | P |
585 -----------------
586 */
587 FORCE_INLINE //detect blend direction
588 BlendResult preProcessCorners(const Kernel_4x4& ker, const xbrz::ScalerCfg& cfg) //result: F, G, J, K corners of "GradientType"
589 {
590  BlendResult result;
591 
592  if ((ker.f == ker.g &&
593  ker.j == ker.k) ||
594  (ker.f == ker.j &&
595  ker.g == ker.k))
596  return result;
597 
598  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
599 
600  const int weight = 4;
601  double jg = dist(ker.i, ker.f) + dist(ker.f, ker.c) + dist(ker.n, ker.k) + dist(ker.k, ker.h) + weight * dist(ker.j, ker.g);
602  double fk = dist(ker.e, ker.j) + dist(ker.j, ker.o) + dist(ker.b, ker.g) + dist(ker.g, ker.l) + weight * dist(ker.f, ker.k);
603 
604  if (jg < fk) //test sample: 70% of values max(jg, fk) / min(jg, fk) are between 1.1 and 3.7 with median being 1.8
605  {
606  const bool dominantGradient = cfg.dominantDirectionThreshold * jg < fk;
607  if (ker.f != ker.g && ker.f != ker.j)
608  result.blend_f = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
609 
610  if (ker.k != ker.j && ker.k != ker.g)
611  result.blend_k = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
612  }
613  else if (fk < jg)
614  {
615  const bool dominantGradient = cfg.dominantDirectionThreshold * fk < jg;
616  if (ker.j != ker.f && ker.j != ker.k)
617  result.blend_j = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
618 
619  if (ker.g != ker.f && ker.g != ker.k)
620  result.blend_g = dominantGradient ? BLEND_DOMINANT : BLEND_NORMAL;
621  }
622  return result;
623 }
624 
625 struct Kernel_3x3
626 {
627  uint32_t
628  /**/a, b, c,
629  /**/d, e, f,
630  /**/g, h, i;
631 
632  Kernel_3x3() {}
633 };
634 
635 #define DEF_GETTER(x) template <RotationDegree rotDeg> uint32_t inline get_##x(const Kernel_3x3& ker) { return ker.x; }
636 //we cannot and NEED NOT write "ker.##x" since ## concatenates preprocessor tokens but "." is not a token
640 #undef DEF_GETTER
641 
642 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_90>(const Kernel_3x3& ker) { return ker.y; }
643 /*DEF_GETTER(a, g)*/ DEF_GETTER(b, d) DEF_GETTER(c, a)
645 DEF_GETTER(g, i) DEF_GETTER(h, f) DEF_GETTER(i, c)
646 #undef DEF_GETTER
647 
648 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_180>(const Kernel_3x3& ker) { return ker.y; }
649 /*DEF_GETTER(a, i)*/ DEF_GETTER(b, h) DEF_GETTER(c, g)
650 DEF_GETTER(d, f) DEF_GETTER(e, e) DEF_GETTER(f, d)
651 DEF_GETTER(g, c) DEF_GETTER(h, b) DEF_GETTER(i, a)
652 #undef DEF_GETTER
653 
654 #define DEF_GETTER(x, y) template <> inline uint32_t get_##x<ROT_270>(const Kernel_3x3& ker) { return ker.y; }
655 /*DEF_GETTER(a, c)*/ DEF_GETTER(b, f) DEF_GETTER(c, i)
656 DEF_GETTER(d, b) DEF_GETTER(e, e) DEF_GETTER(f, h)
657 DEF_GETTER(g, a) DEF_GETTER(h, d) DEF_GETTER(i, g)
658 #undef DEF_GETTER
659 
660 //compress four blend types into a single byte
661 //inline BlendType getTopL (unsigned char b) { return static_cast<BlendType>(0x3 & b); }
662 inline BlendType getTopR (unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 2)); }
663 inline BlendType getBottomR(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 4)); }
664 inline BlendType getBottomL(unsigned char b) { return static_cast<BlendType>(0x3 & (b >> 6)); }
665 
666 inline void setTopL (unsigned char& b, BlendType bt) { b |= bt; } //buffer is assumed to be initialized before preprocessing!
667 inline void setTopR (unsigned char& b, BlendType bt) { b |= (bt << 2); }
668 inline void setBottomR(unsigned char& b, BlendType bt) { b |= (bt << 4); }
669 inline void setBottomL(unsigned char& b, BlendType bt) { b |= (bt << 6); }
670 
671 inline bool blendingNeeded(unsigned char b) { return b != 0; }
672 
673 template <RotationDegree rotDeg> inline
674 unsigned char rotateBlendInfo(unsigned char b) { return b; }
675 template <> inline unsigned char rotateBlendInfo<ROT_90 >(unsigned char b) { return ((b << 2) | (b >> 6)) & 0xff; }
676 template <> inline unsigned char rotateBlendInfo<ROT_180>(unsigned char b) { return ((b << 4) | (b >> 4)) & 0xff; }
677 template <> inline unsigned char rotateBlendInfo<ROT_270>(unsigned char b) { return ((b << 6) | (b >> 2)) & 0xff; }
678 
679 
680 #ifndef NDEBUG
681 int debugPixelX = -1;
682 int debugPixelY = 84;
683 bool breakIntoDebugger = false;
684 #endif
685 
686 /*
687 input kernel area naming convention:
688 -------------
689 | A | B | C |
690 ----|---|---|
691 | D | E | F | //input pixel is at position E
692 ----|---|---|
693 | G | H | I |
694 -------------
695 */
696 template <class Scaler, RotationDegree rotDeg>
697 FORCE_INLINE //perf: quite worth it!
698 void scalePixel(const Kernel_3x3& ker,
699  uint32_t* target, int trgWidth,
700  unsigned char blendInfo, //result of preprocessing all four corners of pixel "e"
701  const xbrz::ScalerCfg& cfg)
702 {
703 #define a get_a<rotDeg>(ker)
704 #define b get_b<rotDeg>(ker)
705 #define c get_c<rotDeg>(ker)
706 #define d get_d<rotDeg>(ker)
707 #define e get_e<rotDeg>(ker)
708 #define f get_f<rotDeg>(ker)
709 #define g get_g<rotDeg>(ker)
710 #define h get_h<rotDeg>(ker)
711 #define i get_i<rotDeg>(ker)
712 
713 #ifndef NDEBUG
714  (void) breakIntoDebugger;
715  //if (breakIntoDebugger)
716  // __debugbreak(); //__asm int 3;
717 #endif
718 
719  const unsigned char blend = rotateBlendInfo<rotDeg>(blendInfo);
720 
721  if (getBottomR(blend) >= BLEND_NORMAL)
722  {
723  auto eq = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_) < cfg.equalColorTolerance_; };
724 
725  auto dist = [&cfg](uint32_t col1, uint32_t col2) { return colorDist(col1, col2, cfg.luminanceWeight_); };
726 
727  const uint32_t px = dist(e, f) <= dist(e, h) ? f : h; //choose most similar color
728 
729  OutputMatrix<Scaler::scale, rotDeg> out(target, trgWidth);
730 
731  bool doLineBlend = true;
732  {
733  if (getBottomR(blend) >= BLEND_DOMINANT)
734  doLineBlend = true;
735 
736  //make sure there is no second blending in an adjacent rotation for this pixel: handles insular pixels, mario eyes
737  else if (getTopR(blend) != BLEND_NONE && !eq(e, g)) //but support double-blending for 90 (degrees) corners
738  doLineBlend = false;
739  else if (getBottomL(blend) != BLEND_NONE && !eq(e, c))
740  doLineBlend = false;
741 
742  //no full blending for L-shapes; blend corner only (handles "mario mushroom eyes")
743  else if (eq(g, h) && eq(h , i) && eq(i, f) && eq(f, c) && !eq(e, i))
744  doLineBlend = false;
745 
746  else doLineBlend = true;
747  }
748 
749  if (doLineBlend)
750  {
751  const double fg = dist(f, g); //test sample: 70% of values max(fg, hc) / min(fg, hc) are between 1.1 and 3.7 with median being 1.9
752  const double hc = dist(h, c); //
753 
754  const bool haveShallowLine = cfg.steepDirectionThreshold * fg <= hc && e != g && d != g;
755  const bool haveSteepLine = cfg.steepDirectionThreshold * hc <= fg && e != c && b != c;
756 
757  if (haveShallowLine)
758  {
759  if (haveSteepLine)
760  Scaler::blendLineSteepAndShallow(px, out);
761  else
762  Scaler::blendLineShallow(px, out);
763  }
764  else
765  {
766  if (haveSteepLine)
767  Scaler::blendLineSteep(px, out);
768  else
769  Scaler::blendLineDiagonal(px,out);
770  }
771  }
772  else
773  Scaler::blendCorner(px, out);
774  }
775 
776 #undef a
777 #undef b
778 #undef c
779 #undef d
780 #undef e
781 #undef f
782 #undef g
783 #undef h
784 #undef i
785 }
786 
787 
788 template <class Scaler> //scaler policy: see "Scaler2x" reference implementation
789 void scaleImage(const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
790 {
791  yFirst = std::max(yFirst, 0);
792  yLast = std::min(yLast, srcHeight);
793  if (yFirst >= yLast || srcWidth <= 0)
794  return;
795 
796  const int trgWidth = srcWidth * Scaler::scale;
797 
798  //"use" space at the end of the image as temporary buffer for "on the fly preprocessing": we even could use larger area of
799  //"sizeof(uint32_t) * srcWidth * (yLast - yFirst)" bytes without risk of accidental overwriting before accessing
800  const int bufferSize = srcWidth;
801  unsigned char* preProcBuffer = reinterpret_cast<unsigned char*>(trg + yLast * Scaler::scale * trgWidth) - bufferSize;
802  std::fill(preProcBuffer, preProcBuffer + bufferSize, 0);
803  //static_assert(BLEND_NONE == 0, "");
804 
805  //initialize preprocessing buffer for first row: detect upper left and right corner blending
806  //this cannot be optimized for adjacent processing stripes; we must not allow for a memory race condition!
807  if (yFirst > 0)
808  {
809  const int y = yFirst - 1;
810 
811  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
812  const uint32_t* s_0 = src + srcWidth * y; //center line
813  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
814  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
815 
816  for (int x = 0; x < srcWidth; ++x)
817  {
818  const int x_m1 = std::max(x - 1, 0);
819  const int x_p1 = std::min(x + 1, srcWidth - 1);
820  const int x_p2 = std::min(x + 2, srcWidth - 1);
821 
822  Kernel_4x4 ker; //perf: initialization is negligable
823  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
824  ker.b = s_m1[x];
825  ker.c = s_m1[x_p1];
826  ker.d = s_m1[x_p2];
827 
828  ker.e = s_0[x_m1];
829  ker.f = s_0[x];
830  ker.g = s_0[x_p1];
831  ker.h = s_0[x_p2];
832 
833  ker.i = s_p1[x_m1];
834  ker.j = s_p1[x];
835  ker.k = s_p1[x_p1];
836  ker.l = s_p1[x_p2];
837 
838  ker.m = s_p2[x_m1];
839  ker.n = s_p2[x];
840  ker.o = s_p2[x_p1];
841  ker.p = s_p2[x_p2];
842 
843  const BlendResult res = preProcessCorners(ker, cfg);
844  /*
845  preprocessing blend result:
846  ---------
847  | F | G | //evalute corner between F, G, J, K
848  ----|---| //input pixel is at position F
849  | J | K |
850  ---------
851  */
852  setTopR(preProcBuffer[x], res.blend_j);
853 
854  if (x + 1 < srcWidth)
855  setTopL(preProcBuffer[x + 1], res.blend_k);
856  }
857  }
858  //------------------------------------------------------------------------------------
859 
860  for (int y = yFirst; y < yLast; ++y)
861  {
862  uint32_t* out = trg + Scaler::scale * y * trgWidth; //consider MT "striped" access
863 
864  const uint32_t* s_m1 = src + srcWidth * std::max(y - 1, 0);
865  const uint32_t* s_0 = src + srcWidth * y; //center line
866  const uint32_t* s_p1 = src + srcWidth * std::min(y + 1, srcHeight - 1);
867  const uint32_t* s_p2 = src + srcWidth * std::min(y + 2, srcHeight - 1);
868 
869  unsigned char blend_xy1 = 0; //corner blending for current (x, y + 1) position
870 
871  for (int x = 0; x < srcWidth; ++x, out += Scaler::scale)
872  {
873 #ifndef NDEBUG
874  breakIntoDebugger = debugPixelX == x && debugPixelY == y;
875 #endif
876  //all those bounds checks have only insignificant impact on performance!
877  const int x_m1 = std::max(x - 1, 0); //perf: prefer array indexing to additional pointers!
878  const int x_p1 = std::min(x + 1, srcWidth - 1);
879  const int x_p2 = std::min(x + 2, srcWidth - 1);
880 
881  //evaluate the four corners on bottom-right of current pixel
882  unsigned char blend_xy = 0; //for current (x, y) position
883  {
884  Kernel_4x4 ker; //perf: initialization is negligable
885  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
886  ker.b = s_m1[x];
887  ker.c = s_m1[x_p1];
888  ker.d = s_m1[x_p2];
889 
890  ker.e = s_0[x_m1];
891  ker.f = s_0[x];
892  ker.g = s_0[x_p1];
893  ker.h = s_0[x_p2];
894 
895  ker.i = s_p1[x_m1];
896  ker.j = s_p1[x];
897  ker.k = s_p1[x_p1];
898  ker.l = s_p1[x_p2];
899 
900  ker.m = s_p2[x_m1];
901  ker.n = s_p2[x];
902  ker.o = s_p2[x_p1];
903  ker.p = s_p2[x_p2];
904 
905  const BlendResult res = preProcessCorners(ker, cfg);
906  /*
907  preprocessing blend result:
908  ---------
909  | F | G | //evalute corner between F, G, J, K
910  ----|---| //current input pixel is at position F
911  | J | K |
912  ---------
913  */
914  blend_xy = preProcBuffer[x];
915  setBottomR(blend_xy, res.blend_f); //all four corners of (x, y) have been determined at this point due to processing sequence!
916 
917  setTopR(blend_xy1, res.blend_j); //set 2nd known corner for (x, y + 1)
918  preProcBuffer[x] = blend_xy1; //store on current buffer position for use on next row
919 
920  blend_xy1 = 0;
921  setTopL(blend_xy1, res.blend_k); //set 1st known corner for (x + 1, y + 1) and buffer for use on next column
922 
923  if (x + 1 < srcWidth) //set 3rd known corner for (x + 1, y)
924  setBottomL(preProcBuffer[x + 1], res.blend_g);
925  }
926 
927  //fill block of size scale * scale with the given color
928  fillBlock(out, trgWidth * sizeof(uint32_t), s_0[x], Scaler::scale); //place *after* preprocessing step, to not overwrite the results while processing the the last pixel!
929 
930  //blend four corners of current pixel
931  if (blendingNeeded(blend_xy)) //good 20% perf-improvement
932  {
933  Kernel_3x3 ker; //perf: initialization is negligable
934 
935  ker.a = s_m1[x_m1]; //read sequentially from memory as far as possible
936  ker.b = s_m1[x];
937  ker.c = s_m1[x_p1];
938 
939  ker.d = s_0[x_m1];
940  ker.e = s_0[x];
941  ker.f = s_0[x_p1];
942 
943  ker.g = s_p1[x_m1];
944  ker.h = s_p1[x];
945  ker.i = s_p1[x_p1];
946 
947  scalePixel<Scaler, ROT_0 >(ker, out, trgWidth, blend_xy, cfg);
948  scalePixel<Scaler, ROT_90 >(ker, out, trgWidth, blend_xy, cfg);
949  scalePixel<Scaler, ROT_180>(ker, out, trgWidth, blend_xy, cfg);
950  scalePixel<Scaler, ROT_270>(ker, out, trgWidth, blend_xy, cfg);
951  }
952  }
953  }
954 }
955 
956 
957 struct Scaler2x
958 {
959  static const int scale = 2;
960 
961  template <class OutputMatrix>
962  static void blendLineShallow(uint32_t col, OutputMatrix& out)
963  {
964  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
965  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
966  }
967 
968  template <class OutputMatrix>
969  static void blendLineSteep(uint32_t col, OutputMatrix& out)
970  {
971  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
972  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
973  }
974 
975  template <class OutputMatrix>
976  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
977  {
978  alphaBlend<1, 4>(out.template ref<1, 0>(), col);
979  alphaBlend<1, 4>(out.template ref<0, 1>(), col);
980  alphaBlend<5, 6>(out.template ref<1, 1>(), col); //[!] fixes 7/8 used in xBR
981  }
982 
983  template <class OutputMatrix>
984  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
985  {
986  alphaBlend<1, 2>(out.template ref<1, 1>(), col);
987  }
988 
989  template <class OutputMatrix>
990  static void blendCorner(uint32_t col, OutputMatrix& out)
991  {
992  //model a round corner
993  alphaBlend<21, 100>(out.template ref<1, 1>(), col); //exact: 1 - pi/4 = 0.2146018366
994  }
995 };
996 
997 
998 struct Scaler3x
999 {
1000  static const int scale = 3;
1001 
1002  template <class OutputMatrix>
1003  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1004  {
1005  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1006  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1007 
1008  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1009  out.template ref<scale - 1, 2>() = col;
1010  }
1011 
1012  template <class OutputMatrix>
1013  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1014  {
1015  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1016  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1017 
1018  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1019  out.template ref<2, scale - 1>() = col;
1020  }
1021 
1022  template <class OutputMatrix>
1023  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1024  {
1025  alphaBlend<1, 4>(out.template ref<2, 0>(), col);
1026  alphaBlend<1, 4>(out.template ref<0, 2>(), col);
1027  alphaBlend<3, 4>(out.template ref<2, 1>(), col);
1028  alphaBlend<3, 4>(out.template ref<1, 2>(), col);
1029  out.template ref<2, 2>() = col;
1030  }
1031 
1032  template <class OutputMatrix>
1033  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1034  {
1035  alphaBlend<1, 8>(out.template ref<1, 2>(), col);
1036  alphaBlend<1, 8>(out.template ref<2, 1>(), col);
1037  alphaBlend<7, 8>(out.template ref<2, 2>(), col);
1038  }
1039 
1040  template <class OutputMatrix>
1041  static void blendCorner(uint32_t col, OutputMatrix& out)
1042  {
1043  //model a round corner
1044  alphaBlend<45, 100>(out.template ref<2, 2>(), col); //exact: 0.4545939598
1045  //alphaBlend<14, 1000>(out.template ref<2, 1>(), col); //0.01413008627 -> negligable
1046  //alphaBlend<14, 1000>(out.template ref<1, 2>(), col); //0.01413008627
1047  }
1048 };
1049 
1050 
1051 struct Scaler4x
1052 {
1053  static const int scale = 4;
1054 
1055  template <class OutputMatrix>
1056  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1057  {
1058  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1059  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1060 
1061  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1062  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1063 
1064  out.template ref<scale - 1, 2>() = col;
1065  out.template ref<scale - 1, 3>() = col;
1066  }
1067 
1068  template <class OutputMatrix>
1069  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1070  {
1071  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1072  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1073 
1074  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1075  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1076 
1077  out.template ref<2, scale - 1>() = col;
1078  out.template ref<3, scale - 1>() = col;
1079  }
1080 
1081  template <class OutputMatrix>
1082  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1083  {
1084  alphaBlend<3, 4>(out.template ref<3, 1>(), col);
1085  alphaBlend<3, 4>(out.template ref<1, 3>(), col);
1086  alphaBlend<1, 4>(out.template ref<3, 0>(), col);
1087  alphaBlend<1, 4>(out.template ref<0, 3>(), col);
1088  alphaBlend<1, 3>(out.template ref<2, 2>(), col); //[!] fixes 1/4 used in xBR
1089  out.template ref<3, 3>() = out.template ref<3, 2>() = out.template ref<2, 3>() = col;
1090  }
1091 
1092  template <class OutputMatrix>
1093  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1094  {
1095  alphaBlend<1, 2>(out.template ref<scale - 1, scale / 2 >(), col);
1096  alphaBlend<1, 2>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1097  out.template ref<scale - 1, scale - 1>() = col;
1098  }
1099 
1100  template <class OutputMatrix>
1101  static void blendCorner(uint32_t col, OutputMatrix& out)
1102  {
1103  //model a round corner
1104  alphaBlend<68, 100>(out.template ref<3, 3>(), col); //exact: 0.6848532563
1105  alphaBlend< 9, 100>(out.template ref<3, 2>(), col); //0.08677704501
1106  alphaBlend< 9, 100>(out.template ref<2, 3>(), col); //0.08677704501
1107  }
1108 };
1109 
1110 
1111 struct Scaler5x
1112 {
1113  static const int scale = 5;
1114 
1115  template <class OutputMatrix>
1116  static void blendLineShallow(uint32_t col, OutputMatrix& out)
1117  {
1118  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1119  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1120  alphaBlend<1, 4>(out.template ref<scale - 3, 4>(), col);
1121 
1122  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1123  alphaBlend<3, 4>(out.template ref<scale - 2, 3>(), col);
1124 
1125  out.template ref<scale - 1, 2>() = col;
1126  out.template ref<scale - 1, 3>() = col;
1127  out.template ref<scale - 1, 4>() = col;
1128  out.template ref<scale - 2, 4>() = col;
1129  }
1130 
1131  template <class OutputMatrix>
1132  static void blendLineSteep(uint32_t col, OutputMatrix& out)
1133  {
1134  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1135  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1136  alphaBlend<1, 4>(out.template ref<4, scale - 3>(), col);
1137 
1138  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1139  alphaBlend<3, 4>(out.template ref<3, scale - 2>(), col);
1140 
1141  out.template ref<2, scale - 1>() = col;
1142  out.template ref<3, scale - 1>() = col;
1143  out.template ref<4, scale - 1>() = col;
1144  out.template ref<4, scale - 2>() = col;
1145  }
1146 
1147  template <class OutputMatrix>
1148  static void blendLineSteepAndShallow(uint32_t col, OutputMatrix& out)
1149  {
1150  alphaBlend<1, 4>(out.template ref<0, scale - 1>(), col);
1151  alphaBlend<1, 4>(out.template ref<2, scale - 2>(), col);
1152  alphaBlend<3, 4>(out.template ref<1, scale - 1>(), col);
1153 
1154  alphaBlend<1, 4>(out.template ref<scale - 1, 0>(), col);
1155  alphaBlend<1, 4>(out.template ref<scale - 2, 2>(), col);
1156  alphaBlend<3, 4>(out.template ref<scale - 1, 1>(), col);
1157 
1158  out.template ref<2, scale - 1>() = col;
1159  out.template ref<3, scale - 1>() = col;
1160 
1161  out.template ref<scale - 1, 2>() = col;
1162  out.template ref<scale - 1, 3>() = col;
1163 
1164  out.template ref<4, scale - 1>() = col;
1165 
1166  alphaBlend<2, 3>(out.template ref<3, 3>(), col);
1167  }
1168 
1169  template <class OutputMatrix>
1170  static void blendLineDiagonal(uint32_t col, OutputMatrix& out)
1171  {
1172  alphaBlend<1, 8>(out.template ref<scale - 1, scale / 2 >(), col);
1173  alphaBlend<1, 8>(out.template ref<scale - 2, scale / 2 + 1>(), col);
1174  alphaBlend<1, 8>(out.template ref<scale - 3, scale / 2 + 2>(), col);
1175 
1176  alphaBlend<7, 8>(out.template ref<4, 3>(), col);
1177  alphaBlend<7, 8>(out.template ref<3, 4>(), col);
1178 
1179  out.template ref<4, 4>() = col;
1180  }
1181 
1182  template <class OutputMatrix>
1183  static void blendCorner(uint32_t col, OutputMatrix& out)
1184  {
1185  //model a round corner
1186  alphaBlend<86, 100>(out.template ref<4, 4>(), col); //exact: 0.8631434088
1187  alphaBlend<23, 100>(out.template ref<4, 3>(), col); //0.2306749731
1188  alphaBlend<23, 100>(out.template ref<3, 4>(), col); //0.2306749731
1189  //alphaBlend<8, 1000>(out.template ref<4, 2>(), col); //0.008384061834 -> negligable
1190  //alphaBlend<8, 1000>(out.template ref<2, 4>(), col); //0.008384061834
1191  }
1192 };
1193 }
1194 
1195 
1196 void xbrz::scale(size_t factor, const uint32_t* src, uint32_t* trg, int srcWidth, int srcHeight, const xbrz::ScalerCfg& cfg, int yFirst, int yLast)
1197 {
1198  switch (factor)
1199  {
1200  case 2:
1201  return scaleImage<Scaler2x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1202  case 3:
1203  return scaleImage<Scaler3x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1204  case 4:
1205  return scaleImage<Scaler4x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1206  case 5:
1207  return scaleImage<Scaler5x>(src, trg, srcWidth, srcHeight, cfg, yFirst, yLast);
1208  }
1209  assert(false);
1210 }
1211 
1212 
1213 bool xbrz::equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
1214 {
1215  return colorDist(col1, col2, luminanceWeight) < equalColorTolerance;
1216 }
1217 
1218 
1219 void xbrz::nearestNeighborScale(const uint32_t* src, int srcWidth, int srcHeight, int srcPitch,
1220  uint32_t* trg, int trgWidth, int trgHeight, int trgPitch,
1221  SliceType st, int yFirst, int yLast)
1222 {
1223  if (srcPitch < srcWidth * static_cast<int>(sizeof(uint32_t)) ||
1224  trgPitch < trgWidth * static_cast<int>(sizeof(uint32_t)))
1225  {
1226  assert(false);
1227  return;
1228  }
1229 
1230  switch (st)
1231  {
1232  case NN_SCALE_SLICE_SOURCE:
1233  //nearest-neighbor (going over source image - fast for upscaling, since source is read only once
1234  yFirst = std::max(yFirst, 0);
1235  yLast = std::min(yLast, srcHeight);
1236  if (yFirst >= yLast || trgWidth <= 0 || trgHeight <= 0) return;
1237 
1238  for (int y = yFirst; y < yLast; ++y)
1239  {
1240  //mathematically: ySrc = floor(srcHeight * yTrg / trgHeight)
1241  // => search for integers in: [ySrc, ySrc + 1) * trgHeight / srcHeight
1242 
1243  //keep within for loop to support MT input slices!
1244  const int yTrg_first = ( y * trgHeight + srcHeight - 1) / srcHeight; //=ceil(y * trgHeight / srcHeight)
1245  const int yTrg_last = ((y + 1) * trgHeight + srcHeight - 1) / srcHeight; //=ceil(((y + 1) * trgHeight) / srcHeight)
1246  const int blockHeight = yTrg_last - yTrg_first;
1247 
1248  if (blockHeight > 0)
1249  {
1250  const uint32_t* srcLine = byteAdvance(src, y * srcPitch);
1251  uint32_t* trgLine = byteAdvance(trg, yTrg_first * trgPitch);
1252  int xTrg_first = 0;
1253 
1254  for (int x = 0; x < srcWidth; ++x)
1255  {
1256  int xTrg_last = ((x + 1) * trgWidth + srcWidth - 1) / srcWidth;
1257  const int blockWidth = xTrg_last - xTrg_first;
1258  if (blockWidth > 0)
1259  {
1260  xTrg_first = xTrg_last;
1261  fillBlock(trgLine, trgPitch, srcLine[x], blockWidth, blockHeight);
1262  trgLine += blockWidth;
1263  }
1264  }
1265  }
1266  }
1267  break;
1268 
1269  case NN_SCALE_SLICE_TARGET:
1270  //nearest-neighbor (going over target image - slow for upscaling, since source is read multiple times missing out on cache! Fast for similar image sizes!)
1271  yFirst = std::max(yFirst, 0);
1272  yLast = std::min(yLast, trgHeight);
1273  if (yFirst >= yLast || srcHeight <= 0 || srcWidth <= 0) return;
1274 
1275  for (int y = yFirst; y < yLast; ++y)
1276  {
1277  uint32_t* trgLine = byteAdvance(trg, y * trgPitch);
1278  const int ySrc = srcHeight * y / trgHeight;
1279  const uint32_t* srcLine = byteAdvance(src, ySrc * srcPitch);
1280  for (int x = 0; x < trgWidth; ++x)
1281  {
1282  const int xSrc = srcWidth * x / trgWidth;
1283  trgLine[x] = srcLine[xSrc];
1284  }
1285  }
1286  break;
1287  }
1288 }
GLdouble GLdouble z
Definition: glew.h:1527
GLvoid **typedef void(GLAPIENTRY *PFNGLGETVERTEXATTRIBDVPROC)(GLuint
Definition: glew.h:1806
GLenum GLenum GLenum GLenum GLenum scale
Definition: glew.h:10669
double luminanceWeight_
Definition: config.hpp:54
const GLfloat * c
Definition: glew.h:12741
double dominantDirectionThreshold
Definition: config.hpp:56
boost::uint32_t uint32_t
Definition: xbrz.hpp:45
#define f
Definition: glew.h:5903
GLuint const GLfloat * val
Definition: glew.h:2614
#define h
GLboolean GLboolean g
Definition: glew.h:7319
GLint GLint GLint GLint GLint GLint y
Definition: glew.h:1220
SliceType
Definition: xbrz.hpp:86
GLenum src
Definition: glew.h:2392
#define d
#define FORCE_INLINE
Definition: xbrz.cpp:191
GLdouble l
Definition: glew.h:6966
GLdouble GLdouble GLdouble b
Definition: glew.h:6966
GLuint64EXT * result
Definition: glew.h:10727
void scale(size_t factor, const uint32_t *src, uint32_t *trg, int srcWidth, int srcHeight, const ScalerCfg &cfg=ScalerCfg(), int yFirst=0, int yLast=std::numeric_limits< int >::max())
Definition: xbrz.cpp:1196
#define a
Definition: glew.h:12732
int int int int int xSrc
Definition: wglew.h:173
const GLdouble * v
Definition: glew.h:1359
GLsizei const GLfloat * value
Definition: glew.h:1817
GLenum GLenum dst
Definition: glew.h:2392
GLboolean GLboolean GLboolean GLboolean a
Definition: glew.h:7319
GLclampf GLclampf GLclampf alpha
Definition: glew.h:1488
GLfloat GLfloat p
Definition: glew.h:12766
bool equalColor(uint32_t col1, uint32_t col2, double luminanceWeight, double equalColorTolerance)
Definition: xbrz.cpp:1213
GLuint res
Definition: glew.h:9258
#define DEF_GETTER(x)
Definition: xbrz.cpp:635
double steepDirectionThreshold
Definition: config.hpp:57
GLfloat GLfloat GLfloat GLfloat h
Definition: glew.h:5910
#define i
RotationDegree
Definition: xbrz.cpp:195
GLint GLint GLint GLint GLint x
Definition: glew.h:1220
GLdouble GLdouble GLdouble r
Definition: glew.h:1374
GLclampd n
Definition: glew.h:5903
GLenum GLint ref
Definition: glew.h:1813
const GLdouble * m
Definition: glew.h:6968
#define g
Definition: glew.h:12730
int int int int int int ySrc
Definition: wglew.h:173
#define c
Definition: glew.h:12743
#define e
#define b
Definition: glew.h:6976
void nearestNeighborScale(const uint32_t *src, int srcWidth, int srcHeight, uint32_t *trg, int trgWidth, int trgHeight)
Definition: xbrz.hpp:104
GLenum target
Definition: glew.h:5190
BlendType
Definition: xbrz.cpp:546
double equalColorTolerance_
Definition: config.hpp:55
GLclampf f
Definition: glew.h:3024