/*
   Memory utility functions
   Written by Arve Holmbo.
   Released to public domain March 1996.
*/

#include <stdlib.h>

#include "pragmas.h"
#include "mem.h"


/* memcpy sibling.  This function copies only non-null bytes,
   and skips nulls in the source buffer.  The function interface
   is identical to memcpy().
*/
void  *MEMtcpy( void *dest, const void *src, size_t len )
{
  char *d = (char *)dest;
  char *s = (char *)src;
//  char *p = s + len;
  int i;
  for ( i=0; i < len; i++ )
    if ( s[i] ) d[i] = s[i];
  return dest;
}





/* Attempt to use Pentium's floating point processor to speed up
   memory copies.  This probably will work best when the dest
   buffer is not in the cache (This: never mind on a Pentium Pro)
   Requires:  len mus be divisible by 8.
   */
void  *MEMcopyF( void *dest, const void *src, size_t len )
{
  extern void _memc( void *dest, const void *src, size_t len8 );
#pragma inline _memc;
#pragma aux    _memc = \
"over:"\
  "dec   eax"\
  "fild  qword [esi+eax*8]"\
  "fistp qword [edi+eax*8]"\
  "jnz   over"\
  parm [edi][esi][ecx]\
  modify [edi esi ecx eax];

  int   len8 = len >> 3;
  void *dest2 = dest;

  _memc( dest2, src, len8 );
  return  dest;
}



/* Attempt make a fast integer-based memcpy
   The number of bytes to copy mus be divisible by 16.
*/
void  *MEMcopy( void *dest, const void *src, size_t len )
{
  extern void _memc4( void *dest, const void *src, size_t len8 );
#pragma inline _memc4;
#pragma aux    _memc4 = \
  "mov  edx, 4"\
  "dec  ebx"\
"over:"\
  "mov  eax, [esi+ebx*4]"\
  "mov  ecx, [edi+ebx*4]" /* Load cache line for destination */\
  "mov  [edi+ebx*4], eax"\
\
  "mov  eax, [esi+ebx*4-4]"\
  "mov  [edi+ebx*4-4], eax"\
\
  "mov  eax, [esi+ebx*4-8]"\
  "mov  [edi+ebx*4-8], eax"\
\
  "mov  eax, [esi+ebx*4-12]"\
  "mov  [edi+ebx*4-12], eax"\
\
  "sub  ebx, edx"\
  "jnz  over"     /* 16 bytes copied */\
  parm [edi][esi][ebx]\
  modify [ebx eax ecx edx];

  int   len4 = len >> 2;

  _memc4( dest, src, len4 );
  return  dest;
}


/* Attempt make a fast integer-based memcpy
   The number of bytes to copy mus be divisible by 16.
*/
void  *MEMcopy2( void *dest, const void *src, size_t len )
{
  extern void _memc4( void *dest, const void *src, size_t len8 );
#pragma inline _memc4;
#pragma aux    _memc4 = \
"over:"\
  "dec  ebx"\
  "mov  eax, [esi+ebx*4]"\
  "mov  ecx, [edi+ebx*4]" /* Load cache line for destination */\
  "mov  [edi+ebx*4], eax"\
\
  "dec  ebx"\
  "mov  eax, [esi+ebx*4]"\
  "mov  [edi+ebx*4], eax"\
\
  "dec  ebx"\
  "mov  eax, [esi+ebx*4]"\
  "mov  [edi+ebx*4], eax"\
\
  "dec  ebx"\
  "mov  eax, [esi+ebx*4]"\
  "mov  [edi+ebx*4], eax"\
\
  "jnz  over"     /* 16 bytes copied */\
  parm [edi][esi][ebx]\
  modify [ebx eax ecx];

  int   len4 = len >> 2;

  _memc4( dest, src, len4 );
  return  dest;
}
