Main Page | Namespace List | Class Hierarchy | Class List | Directories | File List | Namespace Members | Class Members | File Members

LibraryDictionary.cpp

Go to the documentation of this file.
00001 //
00002 // LibraryDictionary.cpp
00003 //
00004 // Copyright (c) Shareaza Development Team, 2002-2005.
00005 // This file is part of SHAREAZA (www.shareaza.com)
00006 //
00007 // Shareaza is free software; you can redistribute it
00008 // and/or modify it under the terms of the GNU General Public License
00009 // as published by the Free Software Foundation; either version 2 of
00010 // the License, or (at your option) any later version.
00011 //
00012 // Shareaza is distributed in the hope that it will be useful,
00013 // but WITHOUT ANY WARRANTY; without even the implied warranty of
00014 // MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00015 // GNU General Public License for more details.
00016 //
00017 // You should have received a copy of the GNU General Public License
00018 // along with Shareaza; if not, write to the Free Software
00019 // Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307  USA
00020 //
00021 
00022 #include "StdAfx.h"
00023 #include "Shareaza.h"
00024 #include "Settings.h"
00025 #include "Library.h"
00026 #include "LibraryMaps.h"
00027 #include "LibraryDictionary.h"
00028 #include "SharedFile.h"
00029 
00030 #include "QueryHashTable.h"
00031 #include "QuerySearch.h"
00032 
00033 #include "Schema.h"
00034 #include "XML.h"
00035 
00036 #include "SHA.h"
00037 #include "ED2K.h"
00038 #include "TigerTree.h"
00039 
00040 #include "UploadQueues.h"
00041 
00042 #ifdef _DEBUG
00043 #undef THIS_FILE
00044 static char THIS_FILE[]=__FILE__;
00045 #define new DEBUG_NEW
00046 #endif
00047 
00048 CLibraryDictionary LibraryDictionary;
00049 
00050 
00052 // CLibraryDictionary construction
00053 
00054 CLibraryDictionary::CLibraryDictionary() : m_pWords( 64 )
00055 {
00056         m_pTable = NULL;
00057         m_bTable = FALSE;
00058         
00059         m_nSearchCookie = 1;
00060 }
00061 
00062 CLibraryDictionary::~CLibraryDictionary()
00063 {
00064         Clear();
00065         if ( m_pTable ) delete m_pTable;
00066 }
00067 
00069 // CLibraryDictionary add and remove
00070 
00071 void CLibraryDictionary::Add(CLibraryFile* pFile)
00072 {
00073         ProcessFile( pFile, TRUE );
00074         
00075         if ( ( pFile->m_bSHA1 || pFile->m_bED2K ) && ! BuildHashTable() )
00076         {
00077                 if ( pFile->m_bSHA1 )
00078                 {
00079                         m_pTable->AddString( CSHA::HashToString( &pFile->m_pSHA1, TRUE ) );
00080                 }
00081                 if ( pFile->m_bED2K )
00082                 {
00083                         m_pTable->AddString( CED2K::HashToString( &pFile->m_pED2K, TRUE ) );
00084                 }
00085         }
00086 }
00087 
00088 void CLibraryDictionary::Remove(CLibraryFile* pFile)
00089 {
00090         ProcessFile( pFile, FALSE );
00091         
00092         // TODO: Always invalidate the table when removing a hashed
00093         // file... is this wise???  It will happen all the time.
00094         
00095         if ( pFile->m_bSHA1 || pFile->m_bED2K ) m_bTable = FALSE;
00096 }
00097 
00099 // CLibraryDictionary file and and remove
00100 
00101 void CLibraryDictionary::ProcessFile(CLibraryFile* pFile, BOOL bAdd)
00102 {
00103         ASSERT(pFile != NULL);
00104         ProcessPhrase( pFile, pFile->GetSearchName(), bAdd, FALSE );
00105         
00106         if ( pFile->m_pMetadata && pFile->m_pSchema )
00107         {
00108                 ProcessWord( pFile, pFile->m_pSchema->m_sURI, bAdd );
00109                 ProcessPhrase( pFile, pFile->GetMetadataWords(), bAdd );
00110         }
00111 }
00112 
00114 // CLibraryDictionary phrase parser
00115 
00116 int CLibraryDictionary::ProcessPhrase(CLibraryFile* pFile, const CString& strPhrase, BOOL bAdd, BOOL bLowercase)
00117 {
00118         LPCTSTR pszPtr = strPhrase;
00119         CString strWord;
00120         int nCount = 0;
00121         
00122     int nStart = 0, nPos = 0;
00123         for ( ; *pszPtr ; nPos++, pszPtr++ )
00124         {
00125                 if ( ! IsCharacter( *pszPtr ) )
00126                 {
00127                         if ( nStart < nPos && IsWord( strPhrase, nStart, nPos - nStart ) )
00128                         {
00129                                 strWord = strPhrase.Mid( nStart, nPos - nStart );
00130                                 if ( bLowercase ) 
00131                                 {
00132                                         CharLower( strWord.GetBuffer() );
00133                                         strWord.GetBuffer();
00134                                 }
00135                                 ProcessWord( pFile, strWord, bAdd );
00136                                 nCount++;
00137                                 
00138                                 if ( nPos - nStart >= 5 && Settings.Library.PartialMatch )
00139                                 {
00140                                         strWord = strPhrase.Mid( nStart, nPos - nStart - 1 );
00141                                         if ( bLowercase ) 
00142                                         {
00143                                                 CharLower( strWord.GetBuffer() );
00144                                                 strWord.GetBuffer();
00145                                         }
00146                                         ProcessWord( pFile, strWord, bAdd );
00147                                         nCount++;
00148                                         
00149                                         strWord = strPhrase.Mid( nStart, nPos - nStart - 2 );
00150                                         if ( bLowercase ) 
00151                                         {
00152                                                 CharLower( strWord.GetBuffer() );
00153                                                 strWord.GetBuffer();
00154                                         }
00155                                         ProcessWord( pFile, strWord, bAdd );
00156                                         nCount++;
00157                                 }
00158                         }
00159                         nStart = nPos + 1;
00160                 }
00161         }
00162         
00163         if ( nStart < nPos && IsWord( strPhrase, nStart, nPos - nStart ) )
00164         {
00165                 strWord = strPhrase.Mid( nStart, nPos - nStart );
00166                 if ( bLowercase ) 
00167                 {
00168                         CharLower( strWord.GetBuffer() );
00169                         strWord.GetBuffer();
00170                 }
00171                 ProcessWord( pFile, strWord, bAdd );
00172                 nCount++;
00173                 
00174                 if ( nPos - nStart >= 5 && Settings.Library.PartialMatch )
00175                 {
00176                         strWord = strPhrase.Mid( nStart, nPos - nStart - 1 );
00177                         if ( bLowercase ) 
00178                         {
00179                                 CharLower( strWord.GetBuffer() );
00180                                 strWord.GetBuffer();
00181                         }
00182                         ProcessWord( pFile, strWord, bAdd );
00183                         nCount++;
00184                         
00185                         strWord = strPhrase.Mid( nStart, nPos - nStart - 2 );
00186                         if ( bLowercase ) 
00187                         {
00188                                 CharLower( strWord.GetBuffer() );
00189                                 strWord.GetBuffer();
00190                         }
00191                         ProcessWord( pFile, strWord, bAdd );
00192                         nCount++;
00193                 }
00194         }
00195         
00196         return nCount;
00197 }
00198 
00200 // CLibraryDictionary word add and remove
00201 
00202 void CLibraryDictionary::ProcessWord(CLibraryFile* pFile, const CString& strWord, BOOL bAdd)
00203 {
00204         CLibraryWord* pWord;
00205         
00206         if ( m_pWords.Lookup( strWord, (void*&)pWord ) )
00207         {
00208                 if ( bAdd )
00209                 {
00210                         pWord->Add( pFile );
00211                 }
00212                 else
00213                 {
00214                         if ( ! pWord->Remove( pFile ) )
00215                         {
00216                                 m_pWords.RemoveKey( strWord );
00217                                 delete pWord;
00218                                 m_bTable = FALSE;
00219                         }
00220                 }
00221         }
00222         else if ( bAdd )
00223         {
00224                 pWord = new CLibraryWord();
00225                 pWord->Add( pFile );
00226                 m_pWords.SetAt( strWord, pWord );
00227 
00228                 if ( ! BuildHashTable() ) m_pTable->AddString( strWord );
00229         }
00230 }
00231 
00233 // CLibraryDictionary build hash table
00234 
00235 BOOL CLibraryDictionary::BuildHashTable()
00236 {
00237         if ( m_pTable == NULL )
00238         {
00239                 m_pTable = new CQueryHashTable();
00240                 m_pTable->Create();
00241         }
00242         
00243         if ( m_bTable ) return FALSE;
00244         
00245         m_pTable->Clear();
00246         
00247         //Add words to hash table
00248         for ( POSITION pos = m_pWords.GetStartPosition() ; pos ; )
00249         {
00250                 CLibraryWord* pWord;
00251                 CString strWord;
00252                 
00253                 m_pWords.GetNextAssoc( pos, strWord, (void*&)pWord );
00254                 
00255                 CLibraryFile* pFileTemp = *(pWord->m_pList); 
00256 
00257                 if ( pFileTemp->IsShared() )    // Check if the file is shared
00258                 {
00259                         if ( ( pFileTemp->IsGhost() ) || (UploadQueues.CanUpload( PROTOCOL_HTTP, pFileTemp, FALSE ) ) ) // Check if a queue exists
00260                         {
00261                                 //Add the keywords to the table
00262                                 m_pTable->AddString( strWord );
00263 /*
00264                                 CString str;
00265                                 str.Format( _T("Word Added: %s"), strWord );
00266                                 theApp.Message( MSG_DEFAULT, str );
00267                         }
00268                         else
00269                         {
00270                                 CString str;
00271                                 str.Format( _T("Word not added: %s"), strWord );
00272                                 theApp.Message( MSG_DEFAULT, str );
00273 */
00274                         }
00275                 }
00276         }
00277         
00278         //Add sha1/ed2k hashes to hash table
00279         for ( POSITION pos = LibraryMaps.GetFileIterator() ; pos ; )
00280         {
00281                 CLibraryFile* pFile = LibraryMaps.GetNextFile( pos );
00282                 
00283                 if (pFile->IsShared())  // Check if the file is shared
00284                 {               
00285                         if ( ( pFile->IsGhost() ) || ( UploadQueues.CanUpload( PROTOCOL_HTTP, pFile, FALSE ) ) ) // Check if a queue exists
00286                         {
00287                                 //Add the hashes to the table
00288                                 if ( pFile->m_bSHA1 )
00289                                 {
00290                                         m_pTable->AddString( CSHA::HashToString( &pFile->m_pSHA1, TRUE ) );
00291                                 }
00292                                 if ( pFile->m_bED2K )
00293                                 {
00294                                         m_pTable->AddString( CED2K::HashToString( &pFile->m_pED2K, TRUE ) );
00295                                 }
00296 /*
00297                                 CString str;
00298                                 str.Format( _T("File added: %s"), pFile->m_sName );
00299                                 theApp.Message( MSG_DEFAULT, str );
00300                         }
00301                         else
00302                         {
00303                                 CString str;
00304                                 str.Format( _T("File not added: %s"), pFile->m_sName );
00305                                 theApp.Message( MSG_DEFAULT, str );
00306 */
00307                         }
00308                 }
00309         }
00310         
00311         m_bTable = TRUE;
00312         
00313         return TRUE;
00314 }
00315 
00316 void CLibraryDictionary::RebuildHashTable()     //Force table to re-build. (If queues changed, etc)
00317 {
00318         m_bTable = FALSE;
00319         BuildHashTable();
00320 }
00321 
00322 
00324 // CLibraryDictionary retreive hash table
00325 
00326 CQueryHashTable* CLibraryDictionary::GetHashTable()
00327 {
00328         CQuickLock oLock( Library.m_pSection );
00329         
00330         BuildHashTable();
00331         
00332         return m_pTable;
00333 }
00334 
00336 // CLibraryDictionary clear
00337 
00338 void CLibraryDictionary::Clear()
00339 {
00340         for ( POSITION pos = m_pWords.GetStartPosition() ; pos ; )
00341         {
00342                 CLibraryWord* pWord;
00343                 CString strWord;
00344                 
00345                 m_pWords.GetNextAssoc( pos, strWord, (void*&)pWord );
00346                 delete pWord;
00347         }
00348         
00349         m_pWords.RemoveAll();
00350         
00351         if ( m_pTable != NULL )
00352         {
00353                 m_pTable->Clear();
00354                 m_bTable = TRUE;
00355         }
00356 }
00357 
00359 // CLibraryDictionary search
00360 
00361 CPtrList* CLibraryDictionary::Search(CQuerySearch* pSearch, int nMaximum, BOOL bLocal)
00362 {
00363         BuildHashTable();
00364         
00365         // Only check the hash when a search comes from other client. 
00366         if ( ! bLocal && ! m_pTable->Check( pSearch ) ) return NULL;
00367         
00368         DWORD nCookie = m_nSearchCookie++;
00369         
00370         CLibraryFile* pHit = NULL;
00371         
00372         LPCTSTR* pWordPtr       = pSearch->m_pWordPtr;
00373         DWORD* pWordLen         = pSearch->m_pWordLen;
00374         
00375         for ( int nWord = pSearch->m_nWords ; nWord > 0 ; nWord--, pWordPtr++, pWordLen++ )
00376         {
00377                 if ( **pWordPtr == '-' ) continue;
00378                 
00379                 LPTSTR pszNull = (LPTSTR)(*pWordPtr) + *pWordLen;
00380                 TCHAR cNull = *pszNull;
00381                 *pszNull = 0;
00382                 
00383                 CLibraryWord* pWord;
00384                 
00385                 if ( m_pWords.Lookup( *pWordPtr, (void*&)pWord ) )
00386                 {
00387                         CLibraryFile** pFiles   = pWord->m_pList;
00388                         CLibraryFile* pLastFile = NULL;
00389                         
00390                         for ( DWORD nFileCount = pWord->m_nCount ; nFileCount ; nFileCount--, pFiles++ )
00391                         {
00392                                 CLibraryFile* pFile = *pFiles;
00393                                 
00394                                 if ( pFile == pLastFile ) continue;
00395                                 pLastFile = pFile;
00396                                 
00397                                 if ( ! bLocal && ! pFile->IsShared() ) continue;
00398                                 
00399                                 if ( pFile->m_nSearchCookie == nCookie )
00400                                 {
00401                                         pFile->m_nSearchWords ++;
00402                                 }
00403                                 else
00404                                 {
00405                                         pFile->m_nSearchCookie  = nCookie;
00406                                         pFile->m_nSearchWords   = 1;
00407                                         pFile->m_pNextHit               = pHit;
00408                                         pHit = pFile;
00409                                 }
00410                         }
00411                 }
00412                 
00413                 *pszNull = cNull;
00414         }
00415         
00416         DWORD nLowerBound = pSearch->m_nWords >= 3 ? pSearch->m_nWords * 2 / 3 : pSearch->m_nWords;
00417         
00418         CPtrList* pHits = NULL;
00419         int nCount = 0;
00420         
00421         for ( ; pHit ; pHit = pHit->m_pNextHit )
00422         {
00423                 if ( pHit->m_nSearchCookie == nCookie && pHit->m_nSearchWords >= nLowerBound )
00424                 {
00425                         if ( pSearch->Match( pHit->GetSearchName(), pHit->m_nSize,
00426                                         pHit->m_pSchema ? (LPCTSTR)pHit->m_pSchema->m_sURI : NULL,
00427                                         pHit->m_pMetadata,
00428                                         pHit->m_bSHA1 ? &pHit->m_pSHA1 : NULL,
00429                                         pHit->m_bTiger ? &pHit->m_pTiger : NULL,
00430                                         pHit->m_bED2K ? &pHit->m_pED2K : NULL ) )
00431                         {
00432                                 if ( ! pHits ) pHits = new CPtrList();
00433                                 pHits->AddTail( pHit );
00434                                 
00435                                 if ( ! bLocal )
00436                                 {
00437                                         pHit->m_nHitsToday++;
00438                                         pHit->m_nHitsTotal++;
00439                                 }
00440                                 
00441                                 if ( pHit->m_nCollIndex )
00442                                 {
00443                                         if ( CLibraryFile* pCollection = LibraryMaps.LookupFile( pHit->m_nCollIndex, ! bLocal, TRUE ) )
00444                                         {
00445                                                 if ( pCollection->m_nSearchCookie != nCookie )
00446                                                 {
00447                                                         pCollection->m_nSearchCookie = nCookie;
00448                                                         pHits->AddHead( pCollection );
00449                                                 }
00450                                         }
00451                                         else
00452                                         {
00453                                                 pHit->m_nCollIndex = 0;
00454                                         }
00455                                 }
00456                                 
00457                                 if ( nMaximum && ++nCount >= nMaximum ) break;
00458                         }
00459                 }
00460         }
00461         
00462         return pHits;
00463 }
00464 
00465 
00467 // CLibraryWord construction
00468 
00469 CLibraryWord::CLibraryWord()
00470 {
00471         m_pList         = NULL;
00472         m_nCount        = 0;
00473 }
00474 
00475 CLibraryWord::~CLibraryWord()
00476 {
00477         if ( m_pList ) delete [] m_pList;
00478 }
00479 
00481 // CLibraryWord add and remove
00482 
00483 void CLibraryWord::Add(CLibraryFile* pFile)
00484 {
00485         CLibraryFile** pList = new CLibraryFile*[ m_nCount + 1 ];
00486         
00487         if ( m_pList )
00488         {
00489                 CopyMemory( pList, m_pList, m_nCount * sizeof(CLibraryFile*) );
00490                 delete [] m_pList;
00491         }
00492         
00493         m_pList = pList;
00494         m_pList[ m_nCount++ ] = pFile;
00495 }
00496 
00497 BOOL CLibraryWord::Remove(CLibraryFile* pFile)
00498 {
00499         CLibraryFile** pSearch = m_pList;
00500         
00501         for ( DWORD nSearch = m_nCount ; nSearch ; nSearch--, pSearch++ )
00502         {
00503                 if ( *pSearch == pFile )
00504                 {
00505                         for ( m_nCount--, nSearch-- ; nSearch ; nSearch--, pSearch++ )
00506                         {
00507                                 *pSearch = pSearch[1];
00508                         }
00509                         break;
00510                 }
00511         }
00512         
00513         return ( m_nCount > 0 );
00514 }
00515 

Generated on Thu Dec 15 10:39:44 2005 for Shareaza 2.2.1.0 by  doxygen 1.4.2