00001
00002
00003
00004
00005
00006
00007
00008
00009
00010
00011
00012
00013
00014
00015
00016
00017
00018
00019
00020
00021
00022
00023
00024 #ifndef __BUILD_ALTIVEC_ASM__
00025
00026
00027
00028
00029 #include <stdlib.h>
00030 #include <string.h>
00031
00032 #include <vlc/vlc.h>
00033
00034 #ifdef HAVE_ALTIVEC_H
00035 # include <altivec.h>
00036 #endif
00037
00038
00039
00040
00041 static void * fast_memcpy ( void * to, const void * from, size_t len );
00042
00043
00044
00045
00046 static int Activate ( vlc_object_t *p_this )
00047 {
00048 p_this->p_vlc->pf_memcpy = fast_memcpy;
00049 return VLC_SUCCESS;
00050 }
00051
00052
00053
00054
00055 vlc_module_begin();
00056 set_description( _("AltiVec memcpy") );
00057 set_category( CAT_ADVANCED );
00058 set_subcategory( SUBCAT_ADVANCED_MISC );
00059 add_requirement( ALTIVEC );
00060 set_capability( "memcpy", 100 );
00061 set_callbacks( Activate, NULL );
00062 add_shortcut( "altivec" );
00063 vlc_module_end();
00064
00065 #else
00066 typedef unsigned long size_t;
00067 #endif
00068
00069 #if defined(CAN_COMPILE_C_ALTIVEC) || defined( __BUILD_ALTIVEC_ASM__ )
00070
00071 #define vector_s16_t vector signed short
00072 #define vector_u16_t vector unsigned short
00073 #define vector_s8_t vector signed char
00074 #define vector_u8_t vector unsigned char
00075 #define vector_s32_t vector signed int
00076 #define vector_u32_t vector unsigned int
00077 #define MMREG_SIZE 16
00078
00079 #define SMALL_MEMCPY(to, from, len) \
00080 { \
00081 unsigned char * end = to + len; \
00082 while( to < end ) \
00083 { \
00084 *to++ = *from++; \
00085 } \
00086 }
00087
00088 static void * fast_memcpy( void * _to, const void * _from, size_t len )
00089 {
00090 void * retval = _to;
00091 unsigned char * to = (unsigned char *)_to;
00092 unsigned char * from = (unsigned char *)_from;
00093
00094 if( len > 16 )
00095 {
00096
00097 register unsigned long int delta;
00098
00099 delta = ((unsigned long)to)&(MMREG_SIZE-1);
00100 if( delta )
00101 {
00102 delta = MMREG_SIZE - delta;
00103 len -= delta;
00104 SMALL_MEMCPY(to, from, delta);
00105 }
00106
00107 if( len & ~(MMREG_SIZE-1) )
00108 {
00109 vector_u8_t perm, ref0, ref1, tmp;
00110
00111 perm = vec_lvsl( 0, from );
00112 ref0 = vec_ld( 0, from );
00113 ref1 = vec_ld( 15, from );
00114 from += 16;
00115 len -= 16;
00116 tmp = vec_perm( ref0, ref1, perm );
00117 while( len & ~(MMREG_SIZE-1) )
00118 {
00119 ref0 = vec_ld( 0, from );
00120 ref1 = vec_ld( 15, from );
00121 from += 16;
00122 len -= 16;
00123 vec_st( tmp, 0, to );
00124 tmp = vec_perm( ref0, ref1, perm );
00125 to += 16;
00126 }
00127 vec_st( tmp, 0, to );
00128 to += 16;
00129 }
00130 }
00131
00132 if( len )
00133 {
00134 SMALL_MEMCPY( to, from, len );
00135 }
00136
00137 return retval;
00138 }
00139
00140 #endif
00141
00142 #if !defined(CAN_COMPILE_C_ALTIVEC) && !defined(__BUILD_ALTIVEC_ASM__)
00143
00144
00145
00146
00147
00148
00149
00150
00151
00152
00153
00154
00155
00156
00157 static void * fast_memcpy( void * _to, const void * _from, size_t len )
00158 {
00159 asm (" \n"
00160 " cmplwi %cr0, %r5, 16 \n"
00161 " mr %r9, %r3 \n"
00162 " bc 4, 1, ._L3 \n"
00163 " andi. %r0, %r3, 15 \n"
00164 " bc 12, 2, ._L4 \n"
00165 " subfic %r0, %r0, 16 \n"
00166 " add %r11, %r3, %r0 \n"
00167 " cmplw %cr0, %r3, %r11 \n"
00168 " subf %r5, %r0, %r5 \n"
00169 " bc 4, 0, ._L4 \n"
00170 " ._L7: \n"
00171 " lbz %r0, 0(%r4) \n"
00172 " stb %r0, 0(%r9) \n"
00173 " addi %r9, %r9, 1 \n"
00174 " cmplw %cr0, %r9, %r11 \n"
00175 " addi %r4, %r4, 1 \n"
00176 " bc 12, 0, ._L7 \n"
00177 " ._L4: \n"
00178 " rlwinm. %r0, %r5, 0, 0, 27 \n"
00179 " bc 12, 2, ._L3 \n"
00180 " addi %r5, %r5, -16 \n"
00181 " li %r11, 15 \n"
00182 " lvsl %v12, 0, %r4 \n"
00183 " lvx %v1, 0, %r4 \n"
00184 " lvx %v0, %r11, %r4 \n"
00185 " rlwinm. %r0, %r5, 0, 0, 27 \n"
00186 " vperm %v13, %v1, %v0, %v12 \n"
00187 " addi %r4, %r4, 16 \n"
00188 " bc 12, 2, ._L11 \n"
00189 " ._L12: \n"
00190 " addi %r5, %r5, -16 \n"
00191 " li %r11, 15 \n"
00192 " lvx %v1, 0, %r4 \n"
00193 " lvx %v0, %r11, %r4 \n"
00194 " rlwinm. %r0, %r5, 0, 0, 27 \n"
00195 " stvx %v13, 0, %r9 \n"
00196 " vperm %v13, %v1, %v0, %v12 \n"
00197 " addi %r4, %r4, 16 \n"
00198 " addi %r9, %r9, 16 \n"
00199 " bc 4, 2, ._L12 \n"
00200 " ._L11: \n"
00201 " stvx %v13, 0, %r9 \n"
00202 " addi %r9, %r9, 16 \n"
00203 " ._L3: \n"
00204 " cmpwi %cr0, %r5, 0 \n"
00205 " bclr 12, 2 \n"
00206 " add %r5, %r9, %r5 \n"
00207 " cmplw %cr0, %r9, %r5 \n"
00208 " bclr 4, 0 \n"
00209 " ._L17: \n"
00210 " lbz %r0, 0(%r4) \n"
00211 " stb %r0, 0(%r9) \n"
00212 " addi %r9, %r9, 1 \n"
00213 " cmplw %cr0, %r9, %r5 \n"
00214 " addi %r4, %r4, 1 \n"
00215 " bc 12, 0, ._L17 \n"
00216 );
00217 }
00218
00219 #endif