Main Page | Modules | Class Hierarchy | Class List | Directories | File List | Class Members | File Members | Related Pages

memcpyaltivec.c

00001 /*****************************************************************************
00002  * memcpyaltivec.c : AltiVec memcpy module
00003  *****************************************************************************
00004  * Copyright (C) 2001 the VideoLAN team
00005  * $Id: memcpyaltivec.c 11664 2005-07-09 06:17:09Z courmisch $
00006  *
00007  * Author: Christophe Massiot <[email protected]>
00008  *
00009  * This program is free software; you can redistribute it and/or modify
00010  * it under the terms of the GNU General Public License as published by
00011  * the Free Software Foundation; either version 2 of the License, or
00012  * (at your option) any later version.
00013  *
00014  * This program is distributed in the hope that it will be useful,
00015  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00016  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00017  * GNU General Public License for more details.
00018  *
00019  * You should have received a copy of the GNU General Public License
00020  * along with this program; if not, write to the Free Software
00021  * Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111, USA.
00022  *****************************************************************************/
00023 
00024 #ifndef __BUILD_ALTIVEC_ASM__
00025 
00026 /*****************************************************************************
00027  * Preamble
00028  *****************************************************************************/
00029 #include <stdlib.h>
00030 #include <string.h>
00031 
00032 #include <vlc/vlc.h>
00033 
00034 #ifdef HAVE_ALTIVEC_H
00035 #   include <altivec.h>
00036 #endif
00037 
00038 /*****************************************************************************
00039  * Local prototypes.
00040  *****************************************************************************/
00041 static void * fast_memcpy ( void * to, const void * from, size_t len );
00042 
00043 /*****************************************************************************
00044  * Module initializer.
00045  *****************************************************************************/
00046 static int Activate ( vlc_object_t *p_this )
00047 {
00048     p_this->p_vlc->pf_memcpy = fast_memcpy;
00049     return VLC_SUCCESS;
00050 }
00051 
00052 /*****************************************************************************
00053  * Module descriptor.
00054  *****************************************************************************/
00055 vlc_module_begin();
00056     set_description( _("AltiVec memcpy") );
00057     set_category( CAT_ADVANCED );
00058     set_subcategory( SUBCAT_ADVANCED_MISC );
00059     add_requirement( ALTIVEC );
00060     set_capability( "memcpy", 100 );
00061     set_callbacks( Activate, NULL );
00062     add_shortcut( "altivec" );
00063 vlc_module_end();
00064 
00065 #else
00066 typedef unsigned long size_t;
00067 #endif /* __BUILD_ALTIVEC_ASM__ */
00068 
00069 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
00070 
00071 #define vector_s16_t vector signed short
00072 #define vector_u16_t vector unsigned short
00073 #define vector_s8_t vector signed char
00074 #define vector_u8_t vector unsigned char
00075 #define vector_s32_t vector signed int
00076 #define vector_u32_t vector unsigned int
00077 #define MMREG_SIZE 16
00078 
00079 #define SMALL_MEMCPY(to, from, len)                                         \
00080 {                                                                           \
00081     unsigned char * end = to + len;                                         \
00082     while( to < end )                                                       \
00083     {                                                                       \
00084         *to++ = *from++;                                                    \
00085     }                                                                       \
00086 }
00087 
00088 static void * fast_memcpy( void * _to, const void * _from, size_t len )
00089 {
00090     void * retval = _to;
00091     unsigned char * to = (unsigned char *)_to;
00092     unsigned char * from = (unsigned char *)_from;
00093 
00094     if( len > 16 )
00095     {
00096         /* Align destination to MMREG_SIZE -boundary */
00097         register unsigned long int delta;
00098 
00099         delta = ((unsigned long)to)&(MMREG_SIZE-1);
00100         if( delta )
00101         {
00102             delta = MMREG_SIZE - delta;
00103             len -= delta;
00104             SMALL_MEMCPY(to, from, delta);
00105         }
00106 
00107         if( len & ~(MMREG_SIZE-1) )
00108         {
00109             vector_u8_t perm, ref0, ref1, tmp;
00110 
00111             perm = vec_lvsl( 0, from );
00112             ref0 = vec_ld( 0, from );
00113             ref1 = vec_ld( 15, from );
00114             from += 16;
00115             len -= 16;
00116             tmp = vec_perm( ref0, ref1, perm );
00117             while( len & ~(MMREG_SIZE-1) )
00118             {
00119                 ref0 = vec_ld( 0, from );
00120                 ref1 = vec_ld( 15, from );
00121                 from += 16;
00122                 len -= 16;
00123                 vec_st( tmp, 0, to );
00124                 tmp = vec_perm( ref0, ref1, perm );
00125                 to += 16;
00126             }
00127             vec_st( tmp, 0, to );
00128             to += 16;
00129         }
00130     }
00131 
00132     if( len )
00133     {
00134         SMALL_MEMCPY( to, from, len );
00135     }
00136 
00137     return retval;
00138 }
00139 
00140 #endif
00141 
00142 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
00143 
00144 /*
00145  * The asm code is generated with:
00146  *
00147  * gcc-2.95 -fvec -D__BUILD_ALTIVEC_ASM__ -O9 -fomit-frame-pointer -mregnames -S *      memcpyaltivec.c
00148  *
00149  * sed 's/.L/._L/g' memcpyaltivec.s |
00150  * awk '{args=""; len=split ($2, arg, ",");
00151  *      for (i=1; i<=len; i++) { a=arg[i]; if (i<len) a=a",";
00152  *                               args = args sprintf ("%-6s", a) }
00153  *      printf ("\t\"\t%-16s%-24s\\n\"\n", $1, args) }' |
00154  * unexpand -a
00155  */
00156 
00157 static void * fast_memcpy( void * _to, const void * _from, size_t len )
00158 {
00159     asm ("                                              \n"
00160         "       cmplwi          %cr0, %r5,  16          \n"
00161         "       mr              %r9,  %r3               \n"
00162         "       bc              4,    1,    ._L3        \n"
00163         "       andi.           %r0,  %r3,  15          \n"
00164         "       bc              12,   2,    ._L4        \n"
00165         "       subfic          %r0,  %r0,  16          \n"
00166         "       add             %r11, %r3,  %r0         \n"
00167         "       cmplw           %cr0, %r3,  %r11        \n"
00168         "       subf            %r5,  %r0,  %r5         \n"
00169         "       bc              4,    0,    ._L4        \n"
00170         "       ._L7:                                   \n"
00171         "       lbz             %r0,  0(%r4)            \n"
00172         "       stb             %r0,  0(%r9)            \n"
00173         "       addi            %r9,  %r9,  1           \n"
00174         "       cmplw           %cr0, %r9,  %r11        \n"
00175         "       addi            %r4,  %r4,  1           \n"
00176         "       bc              12,   0,    ._L7        \n"
00177         "       ._L4:                                   \n"
00178         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
00179         "       bc              12,   2,    ._L3        \n"
00180         "       addi            %r5,  %r5,  -16         \n"
00181         "       li              %r11, 15                \n"
00182         "       lvsl            %v12, 0,    %r4         \n"
00183         "       lvx             %v1,  0,    %r4         \n"
00184         "       lvx             %v0,  %r11, %r4         \n"
00185         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
00186         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00187         "       addi            %r4,  %r4,  16          \n"
00188         "       bc              12,   2,    ._L11       \n"
00189         "       ._L12:                                  \n"
00190         "       addi            %r5,  %r5,  -16         \n"
00191         "       li              %r11, 15                \n"
00192         "       lvx             %v1,  0,    %r4         \n"
00193         "       lvx             %v0,  %r11, %r4         \n"
00194         "       rlwinm.         %r0,  %r5,  0,    0,    27    \n"
00195         "       stvx            %v13, 0,    %r9         \n"
00196         "       vperm           %v13, %v1,  %v0,  %v12  \n"
00197         "       addi            %r4,  %r4,  16          \n"
00198         "       addi            %r9,  %r9,  16          \n"
00199         "       bc              4,    2,    ._L12       \n"
00200         "       ._L11:                                  \n"
00201         "       stvx            %v13, 0,    %r9         \n"
00202         "       addi            %r9,  %r9,  16          \n"
00203         "       ._L3:                                   \n"
00204         "       cmpwi           %cr0, %r5,  0           \n"
00205         "       bclr            12,   2                 \n"
00206         "       add             %r5,  %r9,  %r5         \n"
00207         "       cmplw           %cr0, %r9,  %r5         \n"
00208         "       bclr            4,    0                 \n"
00209         "       ._L17:                                  \n"
00210         "       lbz             %r0,  0(%r4)            \n"
00211         "       stb             %r0,  0(%r9)            \n"
00212         "       addi            %r9,  %r9,  1           \n"
00213         "       cmplw           %cr0, %r9,  %r5         \n"
00214         "       addi            %r4,  %r4,  1           \n"
00215         "       bc              12,   0,    ._L17       \n"
00216         );
00217 }
00218 
00219 #endif

Generated on Tue Dec 20 10:14:48 2005 for vlc-0.8.4a by  doxygen 1.4.2