wavelet_utils.cpp

00001 /* ***** BEGIN LICENSE BLOCK *****
00002 *
00003 * $Id: wavelet_utils.cpp,v 1.3 2005/01/30 05:11:40 gabest Exp $ $Name:  $
00004 *
00005 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
00006 *
00007 * The contents of this file are subject to the Mozilla Public License
00008 * Version 1.1 (the "License");  you may not use this file except in compliance
00009 * with the License. You may obtain a copy of the License at
00010 * http://www.mozilla.org/MPL/
00011 *
00012 * Software distributed under the License is distributed on an "AS IS" basis,
00013 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License for
00014 * the specific language governing rights and limitations under the License.
00015 *
00016 * The Original Code is BBC Research and Development code.
00017 *
00018 * The Initial Developer of the Original Code is the British Broadcasting
00019 * Corporation.
00020 * Portions created by the Initial Developer are Copyright (C) 2004.
00021 * All Rights Reserved.
00022 *
00023 * Contributor(s): Thomas Davies (Original Author), Scott R Ladd
00024 *
00025 * Alternatively, the contents of this file may be used under the terms of
00026 * the GNU General Public License Version 2 (the "GPL"), or the GNU Lesser
00027 * Public License Version 2.1 (the "LGPL"), in which case the provisions of
00028 * the GPL or the LGPL are applicable instead of those above. If you wish to
00029 * allow use of your version of this file only under the terms of the either
00030 * the GPL or LGPL and not to allow others to use your version of this file
00031 * under the MPL, indicate your decision by deleting the provisions above
00032 * and replace them with the notice and other provisions required by the GPL
00033 * or LGPL. If you do not delete the provisions above, a recipient may use
00034 * your version of this file under the terms of any one of the MPL, the GPL
00035 * or the LGPL.
00036 * ***** END LICENSE BLOCK ***** */
00037 
00038 
00039 #include <libdirac_common/wavelet_utils.h>
00040 #include <libdirac_common/common.h>
00041 using namespace dirac;
00042 
00043 #include <cstdlib>
00044 
00045 // Default constructor
00046 Subband::Subband()
00047 {
00048     // this space intentionally left blank
00049 }
00050 
00051 // Constructor
00052 Subband::Subband(int xpos,int ypos, int xlen, int ylen):
00053     xps(xpos),
00054     yps(ypos),
00055     xln(xlen),
00056     yln(ylen),
00057     wgt(1),
00058     qfac(8)
00059 {
00060     // this space intentionally left blank
00061 }
00062 
00063 // Constructor
00064 Subband::Subband(int xpos,int ypos, int xlen, int ylen, int d)
00065   : xps(xpos),
00066     yps(ypos),
00067     xln(xlen), 
00068     yln(ylen),
00069     wgt(1),
00070     dpth(d),
00071     qfac(8)
00072 {
00073     // this space intentionally left blank
00074 }
00075 
00077 Subband::~Subband()
00078 {
00079     // this space intentionally left blank
00080 }
00081 
00082 //subband list methods
00083 
00084 void SubbandList::Init(const int depth,const int xlen,const int ylen)
00085 {
00086     int xl=xlen; 
00087     int yl=ylen; 
00088     
00089     Clear(); 
00090     Subband* tmp;
00091      
00092     for (int level = 1; level <= depth; ++level)
00093     {
00094         xl/=2; 
00095         yl/=2; 
00096         
00097         tmp=new Subband(xl , 0 , xl , yl , level); 
00098         AddBand( *tmp ); 
00099         delete tmp; 
00100         
00101         tmp=new Subband( 0 , yl , xl , yl , level); 
00102         AddBand( *tmp ); 
00103         delete tmp; 
00104         
00105         tmp=new Subband( xl , yl , xl , yl , level); 
00106         AddBand( *tmp ); 
00107         delete tmp; 
00108         
00109         if (level == depth)
00110         {
00111             tmp=new Subband( 0 , 0 , xl , yl , level); 
00112             AddBand( *tmp ); 
00113             delete tmp; 
00114         }        
00115     }
00116     //now set the parent-child relationships
00117     int len = bands.size(); 
00118     (*this)(len).SetParent(0);         
00119     (*this)(len).AddChild(len-3); 
00120     (*this)(len-3).SetParent(len); 
00121     (*this)(len).AddChild(len-2); 
00122     (*this)(len-2).SetParent(len); 
00123     (*this)(len).AddChild(len-1); 
00124     (*this)(len-1).SetParent(len); 
00125 
00126     for (int level = 1; level < depth; ++level)
00127     {
00128          //do parent-child relationship for other bands
00129         (*this)(3*level + 1).AddChild( 3*(level-1) + 1); 
00130         (*this)(3*(level-1) + 1).SetParent(3*level + 1); 
00131 
00132         (*this)(3*level + 2).AddChild(3*(level-1) + 2); 
00133         (*this)(3*(level-1) + 2).SetParent(3*level + 2); 
00134 
00135         (*this)(3*level + 3).AddChild(3*(level-1) + 3); 
00136         (*this)(3*(level-1) + 3).SetParent(3*level + 3); 
00137     }// level
00138 }
00139 
00140 //wavelet transform methods
00142 
00143 //public methods
00144 
00145 WaveletTransform::WaveletTransform(int d, WltFilter f)
00146   : depth(d),
00147     filt_sort(f)
00148 {
00149     // this space intentionally left blank
00150 }
00151 
00153 WaveletTransform::~WaveletTransform()
00154 {
00155     // this space intentionally left blank
00156 }
00157 
00158 void WaveletTransform::Transform(const Direction d, PicArray& pic_data)
00159 {
00160     int xl,yl; 
00161 
00162     if (d == FORWARD)
00163     {
00164         //do work
00165         xl=pic_data.LengthX(); 
00166         yl=pic_data.LengthY(); 
00167         
00168         for (int l = 1; l <= depth; ++l)
00169         {
00170             VHSplit(0,0,xl,yl,pic_data); 
00171             xl /= 2; 
00172             yl /= 2; 
00173         }
00174 
00175         band_list.Init( depth , pic_data.LengthX() , pic_data.LengthY() ); 
00176     }
00177     else
00178     {
00179         //do work
00180         xl = pic_data.LengthX()/(1<<(depth-1)); 
00181         yl = pic_data.LengthY()/(1<<(depth-1)); 
00182         
00183         for (int l = 1; l <= depth; ++l)
00184         {
00185             VHSynth(0,0,xl,yl,pic_data); 
00186             xl *= 2; 
00187             yl *= 2; 
00188         }
00189         
00190         //band list now inaccurate, so clear        
00191         band_list.Clear();     
00192     }
00193 }
00194 
00195 //private functions
00197 
00198 void WaveletTransform::VHSplit(const int xp, const int yp, const int xl, const int yl, PicArray& pic_data)
00199 {
00200 
00201     //version based on integer-like types
00202     //using edge-extension rather than reflection
00203 
00204     OneDArray<ValueType *> tmp_data(yl); 
00205     const int xl2 = xl/2; 
00206     const int yl2 = yl/2; 
00207     const int xend=xp+xl;
00208     const int yend=yp+yl;
00209 
00210     ValueType* line_data; 
00211 
00212     // Positional variables
00213     int i,j,k,r,s; 
00214   
00215     // Objects to do lifting stages 
00216     // (in revese order and type from synthesis)
00217     const PredictStep< 6497 > predictA;
00218     const PredictStep< 217 > predictB;
00219     const UpdateStep< 3616 > updateA;
00220     const UpdateStep< 1817 > updateB;
00221 
00222      //first do horizontal 
00223 
00224     for (j = yp;  j < yend; ++j)
00225     {
00226         // First lifting stage
00227         line_data = pic_data[j];                 
00228 
00229         predictA.Filter( line_data[xp+1] , line_data[xp+2] , line_data[xp] );
00230         predictB.Filter( line_data[xp] , line_data[xp+1] , line_data[xp+1] );
00231 
00232         for (i = xp+2, k = xp+3; i < xend-2; i+=2, k+=2)
00233         {
00234             predictA.Filter( line_data[k] , line_data[i+2] , line_data[i] );
00235             predictB.Filter( line_data[i] , line_data[k-2] , line_data[k] );
00236         }// i
00237         
00238         predictA.Filter( line_data[xend-1] , line_data[xend-2] , line_data[xend-2] );
00239         predictB.Filter( line_data[xend-2] , line_data[xend-3] , line_data[xend-1] );
00240 
00241 
00242          //second lifting stage 
00243         
00244         updateA.Filter( line_data[xp+1] , line_data[xp+2] , line_data[xp] );
00245         updateB.Filter( line_data[xp] , line_data[xp+1] , line_data[xp+1] );
00246 
00247         for (i = xp+2, k = xp+3;  i < xend-2; i+=2 , k+=2)
00248         { 
00249             updateA.Filter( line_data[k] , line_data[i+2] , line_data[i] );
00250             updateB.Filter( line_data[i] , line_data[k-2] , line_data[k] );
00251         }// i
00252 
00253         updateA.Filter( line_data[xend-1] , line_data[xend-2] , line_data[xend-2] );
00254         updateB.Filter( line_data[xend-2] , line_data[xend-3] , line_data[xend-1] );
00255 
00256     }// j
00257 
00258     // next do vertical
00259 
00260     // First lifting stage
00261 
00262     // top edge - j=xp
00263     for ( i = xp ; i<xend ; ++ i)
00264     {
00265         predictA.Filter( pic_data[yp+1][i] , pic_data[yp+2][i] , pic_data[yp][i] );
00266         predictB.Filter( pic_data[yp][i] , pic_data[yp+1][i] , pic_data[yp+1][i] );
00267     }// i
00268 
00269     // middle bit
00270     for ( j = yp+2, k = yp+3 ; j<yend-2 ; j+=2 , k+=2)
00271     {
00272         for ( i = xp ; i<xend ; ++ i)
00273         {
00274             predictA.Filter( pic_data[k][i] , pic_data[j+2][i] , pic_data[j][i] );
00275             predictB.Filter( pic_data[j][i] , pic_data[k-2][i] , pic_data[k][i] );
00276         }// i
00277     }// j
00278     // bottom edge
00279     for ( i = xp ; i<xend ; ++ i)
00280     {
00281         predictA.Filter( pic_data[yend-1][i] , pic_data[yend-2][i] , pic_data[yend-2][i] );
00282         predictB.Filter( pic_data[yend-2][i] , pic_data[yend-3][i] , pic_data[yend-1][i] );
00283     }// i
00284 
00285     // Second lifting stage
00286 
00287     // top edge - j=xp
00288     for ( i = xp ; i<xend ; ++ i)
00289     {
00290         updateA.Filter( pic_data[yp+1][i] , pic_data[yp+2][i] , pic_data[yp][i] );
00291         updateB.Filter( pic_data[yp][i] , pic_data[yp+1][i] , pic_data[yp+1][i] );
00292     }// i
00293 
00294     // middle bit
00295     for ( j = yp+2, k = yp+3 ; j<yend-2 ; j+=2 , k+=2)
00296     {
00297         for ( i = xp ; i<xend ; ++ i)
00298         {
00299             updateA.Filter( pic_data[k][i] , pic_data[j+2][i] , pic_data[j][i] );
00300             updateB.Filter( pic_data[j][i] , pic_data[k-2][i] , pic_data[k][i] );
00301         }// i
00302     }// j
00303     // bottom edge
00304     for ( i = xp ; i<xend ; ++ i)
00305     {
00306         updateA.Filter( pic_data[yend-1][i] , pic_data[yend-2][i] , pic_data[yend-2][i] );
00307         updateB.Filter( pic_data[yend-2][i] , pic_data[yend-3][i] , pic_data[yend-1][i] );
00308     }// i
00309 
00310     // Lastly, have to reorder so that subbands are no longer interleaved
00311 
00312     ValueType** temp_data = new ValueType*[yl];
00313     for ( j = 0 ; j< yl ; ++ j)
00314         temp_data[j] = new ValueType[xl];
00315 
00316     // Make a temporary copy of the subband
00317     for ( j = yp; j<yend ; j++ )
00318         memcpy( temp_data[j-yp] , pic_data[j]+xp , xl * sizeof( ValueType ) );
00319 
00320     // Re-order to de-interleave
00321     for ( j = yp, s=0; j<yp+yl2 ; j++, s+=2)
00322     {
00323         for ( i = xp , r=0 ; i<xp+xl2 ; i++ , r += 2)
00324             pic_data[j][i] = temp_data[s][r];
00325         for ( i = xp+xl2, r=1; i<xend ; i++ , r += 2)
00326             pic_data[j][i] = temp_data[s][r];
00327     }// j 
00328 
00329     for ( j = yp+yl2, s=1 ; j<yend ; j++ , s += 2)
00330     {
00331         for ( i = xp , r=0 ; i<xp+xl2 ; i++ , r += 2)
00332             pic_data[j][i] = temp_data[s][r];
00333         for ( i = xp+xl2, r=1; i<xend ; i++ , r += 2)
00334             pic_data[j][i] = temp_data[s][r];
00335     }// j 
00336 
00337 
00338     for ( j = 0 ; j< yl ; ++ j)
00339         delete[] temp_data[j];
00340     delete[] temp_data;
00341 
00342 }
00343 
00344 void WaveletTransform::VHSynth(const int xp, const int yp, const int xl, const int yl, PicArray& pic_data)
00345 {
00346     int i,j,k,r,s;
00347 
00348     const int xend( xp+xl );
00349     const int yend( yp+yl );
00350     const int xl2( xl/2 );
00351     const int yl2( yl/2 );
00352 
00353     const PredictStep< 1817 > predictB;
00354     const PredictStep< 3616 > predictA;
00355     const UpdateStep< 217 > updateB;
00356     const UpdateStep< 6497 > updateA;
00357 
00358     ValueType* line_data;
00359 
00360     // Firstly reorder to interleave subbands, so that subsequent calculations can be in-place
00361 
00362     ValueType** temp_data = new ValueType*[yl];
00363     for ( j = 0 ; j< yl ; ++ j)
00364         temp_data[j] = new ValueType[xl];
00365 
00366     // Make a temporary copy of the subband
00367     for ( j = yp; j<yend ; j++ )
00368         memcpy( temp_data[j-yp] , pic_data[j]+xp , xl * sizeof( ValueType ) );
00369 
00370     // Re-order to interleave
00371     for ( j = 0, s=yp; j<yl2 ; j++, s+=2)
00372     {
00373         for ( i = 0 , r=xp ; i<xl2 ; i++ , r += 2)
00374             pic_data[s][r] = temp_data[j][i];
00375         for ( i = xl2, r=xp+1; i<xl ; i++ , r += 2)
00376             pic_data[s][r] = temp_data[j][i];
00377     }// j 
00378 
00379     for ( j = yl2, s=yp+1 ; j<yl ; j++ , s += 2)
00380     {
00381         for ( i = 0 , r=xp ; i<xl2 ; i++ , r += 2)
00382             pic_data[s][r] = temp_data[j][i];
00383         for ( i = xl2, r=xp+1; i<xl ; i++ , r += 2)
00384             pic_data[s][r] = temp_data[j][i];
00385     }// j 
00386 
00387     for ( j = 0 ; j< yl ; ++ j)
00388         delete[] temp_data[j];
00389     delete[] temp_data;
00390 
00391     // Next, do the vertical synthesis
00392     // First lifting stage
00393 
00394     // Begin with the bottom edge
00395     for ( i = xend-1 ; i>=xp ; --i)
00396     {
00397         predictB.Filter( pic_data[yend-2][i] , pic_data[yend-3][i] , pic_data[yend-1][i] );
00398         predictA.Filter( pic_data[yend-1][i] , pic_data[yend-2][i] , pic_data[yend-2][i] );
00399     }// i
00400     // Next, do the middle bit
00401     for ( j = yend-4, k = yend-3 ; j>yp ; j-=2 , k-=2)
00402     {
00403         for ( i = xend-1 ; i>=xp ; --i)
00404         {
00405             predictB.Filter( pic_data[j][i] , pic_data[k-2][i] , pic_data[k][i] );
00406             predictA.Filter( pic_data[k][i] , pic_data[j+2][i] , pic_data[j][i] );
00407         }// i
00408     }// j
00409     // Then do the top edge
00410     for ( i = xend-1 ; i>=xp ; --i)
00411     {
00412         predictB.Filter( pic_data[yp][i] , pic_data[yp+1][i] , pic_data[yp+1][i] );
00413         predictA.Filter( pic_data[yp+1][i] , pic_data[yp+2][i] , pic_data[yp][i] );
00414     }// i
00415 
00416     // Second lifting stage
00417 
00418     // Begin with the bottom edge
00419     for ( i = xend-1 ; i>=xp ; --i)
00420     {
00421         updateB.Filter( pic_data[yend-2][i] , pic_data[yend-3][i] , pic_data[yend-1][i] );
00422         updateA.Filter( pic_data[yend-1][i] , pic_data[yend-2][i] , pic_data[yend-2][i] );
00423     }// i
00424     // Next, do the middle bit
00425     for ( j = yend-4, k = yend-3 ; j>yp ; j-=2 , k-=2)
00426     {
00427         for ( i = xend-1 ; i>=xp ; --i)
00428         {
00429             updateB.Filter( pic_data[j][i] , pic_data[k-2][i] , pic_data[k][i] );
00430             updateA.Filter( pic_data[k][i] , pic_data[j+2][i] , pic_data[j][i] );
00431         }// i
00432     }// j
00433     // Then do the top edge
00434     for ( i = xend-1 ; i>=xp ; --i)
00435     {
00436         updateB.Filter( pic_data[yp][i] , pic_data[yp+1][i] , pic_data[yp+1][i] );
00437         updateA.Filter( pic_data[yp+1][i] , pic_data[yp+2][i] , pic_data[yp][i] );
00438     }// i
00439 
00440 
00441     // Next do the horizontal synthesis
00442     for (j = yend-1;  j >= yp ; --j)
00443     {
00444         // First lifting stage 
00445         line_data = pic_data[j];
00446 
00447         predictB.Filter( line_data[xend-2] , line_data[xend-3] , line_data[xend-1] ); 
00448         predictA.Filter( line_data[xend-1] , line_data[xend-2] , line_data[xend-2] );
00449 
00450         for (i = xend-4, k = xend-3;  i > xp; i-=2 , k-=2)
00451         { 
00452             predictB.Filter( line_data[i] , line_data[k-2] , line_data[k] );
00453             predictA.Filter( line_data[k] , line_data[i+2] , line_data[i] );
00454         }// i
00455 
00456         predictB.Filter( line_data[xp] , line_data[xp+1] , line_data[xp+1] );
00457         predictA.Filter( line_data[xp+1] , line_data[xp+2] , line_data[xp] );
00458 
00459         // Second lifting stage
00460 
00461         updateB.Filter( line_data[xend-2] , line_data[xend-3] , line_data[xend-1] );
00462         updateA.Filter( line_data[xend-1] , line_data[xend-2] , line_data[xend-2] );
00463 
00464         for (i = xend-4, k = xend-3;  i > xp; i-=2 , k-=2)
00465         {
00466             updateB.Filter( line_data[i] , line_data[k-2] , line_data[k] );
00467             updateA.Filter( line_data[k] , line_data[i+2] , line_data[i] );
00468         }// i
00469 
00470         updateB.Filter( line_data[xp] , line_data[xp+1] , line_data[xp+1] );
00471         updateA.Filter( line_data[xp+1] , line_data[xp+2] , line_data[xp] );
00472 
00473     }
00474 
00475 }
00476 
00477 //perceptual weighting stuff
00479 
00480 // Returns a perceptual noise weighting based on extending CCIR 959 values
00481 // assuming a two-d isotropic response. Also has a fudge factor of 20% for chroma
00482 float WaveletTransform::PerceptualWeight( float xf , float yf , CompSort cs )
00483 {
00484     double freq_sqd( xf*xf + yf*yf );
00485 
00486     if ( cs != Y_COMP )
00487         freq_sqd *= 1.2;
00488 
00489     return 0.255 * std::pow( 1.0 + 0.2561*freq_sqd , 0.75) ;
00490 
00491 }
00492 
00493 void WaveletTransform::SetBandWeights (const float cpd, 
00494                                        const FrameSort& fsort,
00495                                        const ChromaFormat& cformat,
00496                                        const CompSort csort)
00497 {
00498     //NB - only designed for progressive to date    
00499 
00500     int xlen, ylen, xl, yl, xp, yp, depth;
00501     float xfreq, yfreq;
00502     float temp;
00503 
00504     // Compensate for chroma subsampling
00505 
00506     float chroma_xfac(1.0);
00507     float chroma_yfac(1.0);
00508 
00509     if( csort != Y_COMP)
00510     {
00511         if( cformat == format422)
00512         {
00513             chroma_xfac = 2.0;
00514             chroma_yfac = 1.0;
00515         }
00516         else if( cformat == format411 )
00517         {
00518             chroma_xfac = 4.0;
00519             chroma_yfac = 1.0;
00520         }
00521         else if( cformat == format420 )
00522         {
00523             chroma_xfac = 2.0;
00524             chroma_yfac = 2.0;
00525         }
00526 
00527     }
00528 
00529     xlen = 2 * band_list(1).Xl();
00530     ylen = 2 * band_list(1).Yl();
00531 
00532     if (cpd != 0.0)
00533     {
00534         for( int i = 1; i<=band_list.Length() ; i++ )
00535         {
00536             xp = band_list(i).Xp();
00537             yp = band_list(i).Yp();
00538             xl = band_list(i).Xl();
00539             yl = band_list(i).Yl();
00540 
00541 
00542             xfreq = cpd * ( float(xp) + (float(xl)/2.0) ) / float(xlen);
00543             yfreq = cpd * ( float(yp) + (float(yl)/2.0) ) / float(ylen);
00544 
00545             if ( fsort != I_frame )
00546             {
00547                 xfreq /= 8.0;
00548                 yfreq /= 8.0;
00549             }
00550 
00551 
00552             temp = PerceptualWeight( xfreq/chroma_xfac , yfreq/chroma_yfac , csort );
00553 
00554             band_list(i).SetWt(temp);
00555         }// i
00556 
00557         // Give more welly to DC in a completely unscientific manner ...
00558         // (should really relate this to the frame rate)
00559         band_list( band_list.Length() ).SetWt(band_list(13).Wt()/6.0);
00560 
00561         // Make sure dc is always the lowest weight
00562         float min_weight=band_list(band_list.Length()).Wt();
00563 
00564         for( int b=1 ; b<=band_list.Length()-1 ; b++ )
00565             min_weight = ((min_weight>band_list(b).Wt()) ? band_list(b).Wt() : min_weight);
00566 
00567         band_list( band_list.Length() ).SetWt( min_weight );
00568 
00569         // Now normalize weights so that white noise is always weighted the same
00570 
00571         // Overall factor to ensure that white noise ends up with the same RMS, whatever the weight
00572         double overall_factor=0.0;
00573         //fraction of the total number of samples belonging to each subband
00574         double subband_fraction;    
00575 
00576         for( int i=1 ; i<=band_list.Length() ; i++ )
00577         {
00578             subband_fraction = 1.0/((double) band_list(i).Scale() * band_list(i).Scale());
00579             overall_factor += subband_fraction/( band_list(i).Wt() * band_list(i).Wt() );
00580         }
00581         overall_factor = std::sqrt( overall_factor );
00582 
00583         //go through and normalise
00584 
00585         for( int i=band_list.Length() ; i>0 ; i-- )
00586             band_list(i).SetWt( band_list(i).Wt() * overall_factor );
00587     }
00588     else
00589     {//cpd=0 so set all weights to 1
00590 
00591         for( int i=1 ; i<=band_list.Length() ; i++ )
00592            band_list(i).SetWt( 1.0 );        
00593 
00594     }
00595 
00596     //Finally, adjust to compensate for the absence of scaling in the transform
00597     //Factor used to compensate:
00598     const double alpha(1.149658203);    
00599     for ( int i=1 ; i<=band_list.Length() ; ++i )
00600     {
00601         depth=band_list(i).Depth();
00602 
00603         if ( band_list(i).Xp() == 0 && band_list(i).Yp() == 0)
00604             temp=std::pow(alpha,2*depth);
00605         else if ( band_list(i).Xp() != 0 && band_list(i).Yp() != 0)
00606             temp=std::pow(alpha,2*(depth-2));
00607         else
00608             temp=std::pow(alpha,2*(depth-1));
00609 
00610         band_list(i).SetWt(band_list(i).Wt()/temp);
00611 
00612     }// i        
00613 
00614 }    

Generated on Tue Dec 13 14:47:15 2005 for guliverkli by  doxygen 1.4.5