427 lines
12 KiB
C
427 lines
12 KiB
C
/*-
|
|
* Copyright (c) 1990 The Regents of the University of California.
|
|
* All rights reserved.
|
|
*
|
|
* This code is derived from software contributed to Berkeley by
|
|
* the Systems Programming Group of the University of Utah Computer
|
|
* Science Department.
|
|
*
|
|
* Redistribution and use in source and binary forms are permitted
|
|
* provided that: (1) source distributions retain this entire copyright
|
|
* notice and comment, and (2) distributions including binaries display
|
|
* the following acknowledgement: ``This product includes software
|
|
* developed by the University of California, Berkeley and its contributors''
|
|
* in the documentation or other materials provided with the distribution
|
|
* and in all advertising materials mentioning features or use of this
|
|
* software. Neither the name of the University nor the names of its
|
|
* contributors may be used to endorse or promote products derived
|
|
* from this software without specific prior written permission.
|
|
* THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR
|
|
* IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED
|
|
* WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE.
|
|
*/
|
|
|
|
#ifndef mc68000
|
|
#include <string.h>
|
|
|
|
#ifdef __PPC__
|
|
|
|
/* This is a _fast_ block move routine for PowerPC - Piru */
|
|
|
|
#define USE_INDEX 1
|
|
#define USE_READ8WRITE8 1
|
|
#define USE_DOUBLECOPY 0 /* experimental, need to be tested */
|
|
|
|
void bcopy(const void *s1, void *s2, size_t n)
|
|
{
|
|
typedef unsigned char u8;
|
|
typedef unsigned int LTYPE;
|
|
const size_t LSIZE = sizeof(LTYPE);
|
|
const size_t LMASK = sizeof(LTYPE) - 1;
|
|
|
|
size_t m, o;
|
|
|
|
if (!n)
|
|
return;
|
|
|
|
if (s2 < s1)
|
|
{
|
|
if (n >= LSIZE * 4)
|
|
{
|
|
m = (size_t) s1 & LMASK;
|
|
if (m && ((size_t) s2 & LMASK) == m)
|
|
{
|
|
n -= m;
|
|
do
|
|
*((u8 *)s2)++ = *((u8 *)s1)++;
|
|
while (--m);
|
|
}
|
|
#if USE_DOUBLECOPY
|
|
/* If source is aligned by 4, destionation is aligned by 4 and not 8 */
|
|
if (!((size_t) s1 & LMASK) && !((size_t) s2 & LMASK) && ((size_t) s2 & LSIZE))
|
|
{
|
|
n -= LSIZE;
|
|
*((LTYPE *)s2)++ = *((LTYPE *)s1)++;
|
|
}
|
|
#endif
|
|
m = n / LSIZE;
|
|
n &= LMASK;
|
|
if (m >= 8)
|
|
{
|
|
o = m / 8;
|
|
m &= 7;
|
|
#if USE_INDEX
|
|
|
|
#if USE_DOUBLECOPY
|
|
if (!((size_t) s1 & LMASK) && !((size_t) s2 & (sizeof(double) - 1)))
|
|
{
|
|
do
|
|
{
|
|
#if USE_READ8WRITE8
|
|
|
|
register double r0,r1,r2,r3;
|
|
|
|
r0 = ((double *)s1)[0];
|
|
r1 = ((double *)s1)[1];
|
|
r2 = ((double *)s1)[2];
|
|
((double *)s1) += 4;
|
|
r3 = ((double *)s1)[-1];
|
|
|
|
((double *)s2)[0] = r0;
|
|
((double *)s2)[1] = r1;
|
|
((double *)s2)[2] = r2;
|
|
((double *)s2) += 4;
|
|
((double *)s2)[-1] = r3;
|
|
|
|
#else
|
|
|
|
((double *)s2)[0] = ((double *)s1)[0];
|
|
((double *)s2)[1] = ((double *)s1)[1];
|
|
((double *)s2)[2] = ((double *)s1)[2];
|
|
((double *)s2)[3] = ((double *)s1)[3];
|
|
|
|
((double *)s2) += 4;
|
|
((double *)s1) += 4;
|
|
|
|
#endif
|
|
} while (--o);
|
|
}
|
|
else
|
|
#endif
|
|
do
|
|
{
|
|
#if USE_READ8WRITE8
|
|
|
|
register LTYPE r0,r1,r2,r3,r4,r5,r6,r7;
|
|
|
|
r0 = ((LTYPE *)s1)[0];
|
|
r1 = ((LTYPE *)s1)[1];
|
|
r2 = ((LTYPE *)s1)[2];
|
|
r3 = ((LTYPE *)s1)[3];
|
|
r4 = ((LTYPE *)s1)[4];
|
|
r5 = ((LTYPE *)s1)[5];
|
|
r6 = ((LTYPE *)s1)[6];
|
|
((LTYPE *)s1) += 8;
|
|
r7 = ((LTYPE *)s1)[-1];
|
|
|
|
((LTYPE *)s2)[0] = r0;
|
|
((LTYPE *)s2)[1] = r1;
|
|
((LTYPE *)s2)[2] = r2;
|
|
((LTYPE *)s2)[3] = r3;
|
|
((LTYPE *)s2)[4] = r4;
|
|
((LTYPE *)s2)[5] = r5;
|
|
((LTYPE *)s2)[6] = r6;
|
|
((LTYPE *)s2) += 8;
|
|
((LTYPE *)s2)[-1] = r7;
|
|
|
|
#else
|
|
|
|
//seems slower here: asm("dcbt 0,%0" : : "b" (s1));
|
|
|
|
((LTYPE *)s2)[0] = ((LTYPE *)s1)[0];
|
|
((LTYPE *)s2)[1] = ((LTYPE *)s1)[1];
|
|
((LTYPE *)s2)[2] = ((LTYPE *)s1)[2];
|
|
((LTYPE *)s2)[3] = ((LTYPE *)s1)[3];
|
|
|
|
((LTYPE *)s2)[4] = ((LTYPE *)s1)[4];
|
|
((LTYPE *)s2)[5] = ((LTYPE *)s1)[5];
|
|
((LTYPE *)s2)[6] = ((LTYPE *)s1)[6];
|
|
((LTYPE *)s2)[7] = ((LTYPE *)s1)[7];
|
|
|
|
((LTYPE *)s2) += 8;
|
|
((LTYPE *)s1) += 8;
|
|
|
|
#endif
|
|
|
|
} while (--o);
|
|
|
|
#else
|
|
|
|
((LTYPE *)s1)--; ((LTYPE *)s2)--;
|
|
do
|
|
{
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
*++((LTYPE *)s2) = *++((LTYPE *)s1);
|
|
|
|
} while (--o);
|
|
((LTYPE *)s1)++; ((LTYPE *)s2)++;
|
|
|
|
#endif
|
|
}
|
|
while (m--)
|
|
*((LTYPE *)s2)++ = *((LTYPE *)s1)++;
|
|
}
|
|
while (n--)
|
|
*((u8 *)s2)++ = *((u8 *)s1)++;
|
|
}
|
|
else
|
|
{
|
|
(u8 *)s1 += n;
|
|
(u8 *)s2 += n;
|
|
|
|
if (n >= LSIZE * 4)
|
|
{
|
|
m = (size_t) s1 & LMASK;
|
|
if (m && ((size_t) s2 & LMASK) == m)
|
|
{
|
|
n -= m;
|
|
do
|
|
*--((u8 *)s2) = *--((u8 *)s1);
|
|
while (--m);
|
|
}
|
|
m = n / LSIZE;
|
|
n &= LMASK;
|
|
if (m >= 8)
|
|
{
|
|
o = m / 8;
|
|
m &= 7;
|
|
|
|
#if USE_DOUBLECOPY
|
|
if (!((size_t) s1 & LMASK) && !((size_t) s2 & LMASK))
|
|
{
|
|
do
|
|
{
|
|
#if USE_READ8WRITE8
|
|
|
|
register double r0,r1,r2,r3;
|
|
|
|
r0 = ((double *)s1)[-1];
|
|
r1 = ((double *)s1)[-2];
|
|
r2 = ((double *)s1)[-3];
|
|
((double *)s1) -= 4;
|
|
r3 = ((double *)s1)[0];
|
|
|
|
((double *)s2)[-1] = r0;
|
|
((double *)s2)[-2] = r1;
|
|
((double *)s2)[-3] = r2;
|
|
((double *)s2) -= 4;
|
|
((double *)s2)[0] = r3;
|
|
|
|
#else
|
|
|
|
((double *)s2)[-1] = ((double *)s1)[-1];
|
|
((double *)s2)[-2] = ((double *)s1)[-2];
|
|
((double *)s2)[-3] = ((double *)s1)[-3];
|
|
((double *)s2)[-4] = ((double *)s1)[-4];
|
|
|
|
((double *)s2) -= 4;
|
|
((double *)s1) -= 4;
|
|
|
|
#endif
|
|
} while (--o);
|
|
}
|
|
else
|
|
#endif
|
|
do
|
|
{
|
|
#if USE_INDEX
|
|
|
|
#if USE_READ8WRITE8
|
|
|
|
register LTYPE r0,r1,r2,r3,r4,r5,r6,r7;
|
|
|
|
r0 = ((LTYPE *)s1)[-1];
|
|
r1 = ((LTYPE *)s1)[-2];
|
|
r2 = ((LTYPE *)s1)[-3];
|
|
r3 = ((LTYPE *)s1)[-4];
|
|
r4 = ((LTYPE *)s1)[-5];
|
|
r5 = ((LTYPE *)s1)[-6];
|
|
r6 = ((LTYPE *)s1)[-7];
|
|
((LTYPE *)s1) -= 8;
|
|
r7 = ((LTYPE *)s1)[0];
|
|
|
|
((LTYPE *)s2)[-1] = r0;
|
|
((LTYPE *)s2)[-2] = r1;
|
|
((LTYPE *)s2)[-3] = r2;
|
|
((LTYPE *)s2)[-4] = r3;
|
|
((LTYPE *)s2)[-5] = r4;
|
|
((LTYPE *)s2)[-6] = r5;
|
|
((LTYPE *)s2)[-7] = r6;
|
|
((LTYPE *)s2) -= 8;
|
|
((LTYPE *)s2)[0] = r7;
|
|
|
|
#else
|
|
|
|
//seems slower here: asm("dcbt 0,%0" : : "b" (((LTYPE *) s1) - 8));
|
|
|
|
((LTYPE *)s2)[-1] = ((LTYPE *)s1)[-1];
|
|
((LTYPE *)s2)[-2] = ((LTYPE *)s1)[-2];
|
|
((LTYPE *)s2)[-3] = ((LTYPE *)s1)[-3];
|
|
((LTYPE *)s2)[-4] = ((LTYPE *)s1)[-4];
|
|
|
|
((LTYPE *)s2)[-5] = ((LTYPE *)s1)[-5];
|
|
((LTYPE *)s2)[-6] = ((LTYPE *)s1)[-6];
|
|
((LTYPE *)s2)[-7] = ((LTYPE *)s1)[-7];
|
|
((LTYPE *)s2)[-8] = ((LTYPE *)s1)[-8];
|
|
|
|
((LTYPE *)s2) -= 8;
|
|
((LTYPE *)s1) -= 8;
|
|
|
|
#endif
|
|
|
|
#else
|
|
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
|
|
#endif
|
|
} while (--o);
|
|
}
|
|
while (m--)
|
|
*--((LTYPE *)s2) = *--((LTYPE *)s1);
|
|
}
|
|
while (n--)
|
|
*--((u8 *)s2) = *--((u8 *)s1);
|
|
}
|
|
}
|
|
|
|
#else
|
|
|
|
void bcopy(const void *s1, void *s2, size_t n)
|
|
{
|
|
size_t m;
|
|
if(!n)
|
|
return;
|
|
if(s2<s1)
|
|
{ if((long)s1&1)
|
|
{ *((char *)s2)++=*((char *)s1)++;
|
|
n--; }
|
|
if(!((long)s2&1))
|
|
{ m=n/sizeof(long);
|
|
n&=sizeof(long)-1;
|
|
for(;m;m--)
|
|
*((long *)s2)++=*((long *)s1)++;
|
|
}
|
|
for(;n;n--)
|
|
*((char *)s2)++=*((char *)s1)++;
|
|
}else
|
|
{ (char *)s1+=n;
|
|
(char *)s2+=n;
|
|
if((long)s1&1)
|
|
{ *--((char *)s2)=*--((char *)s1);
|
|
n--; }
|
|
if(!((long)s2&1))
|
|
{ m=n/sizeof(long);
|
|
n&=sizeof(long)-1;
|
|
for(;m;m--)
|
|
*--((long *)s2)=*--((long *)s1);
|
|
}
|
|
for(;n;n--)
|
|
*--((char *)s2)=*--((char *)s1);
|
|
}
|
|
}
|
|
|
|
#endif
|
|
#else
|
|
#include "defs.h"
|
|
|
|
/*
|
|
* This is probably not the best we can do, but it is still 2-10 times
|
|
* faster than the C version in the portable gen directory.
|
|
*
|
|
* Things that might help:
|
|
* - unroll the longword copy loop (might not be good for a 68020)
|
|
* - longword align when possible (only on the 68020)
|
|
* - use nested DBcc instructions or use one and limit size to 64K
|
|
*/
|
|
|
|
|
|
|
|
ENTRY(bcopy)
|
|
asm(" \n\
|
|
movl sp@(12),d1 /* check count */ \n\
|
|
jle bcdone_bcopy /* <= 0, don't do anything */ \n\
|
|
movl sp@(4),a0 /* src address */ \n\
|
|
movl sp@(8),a1 /* dest address */ \n\
|
|
cmpl a1,a0 /* src after dest? */ \n\
|
|
jlt bcback /* yes, must copy backwards */ \n\
|
|
movl a0,d0 \n\
|
|
btst #0,d0 /* src address odd? */ \n\
|
|
jeq bcfeven /* no, skip alignment */ \n\
|
|
movb a0@+,a1@+ /* yes, copy a byte */ \n\
|
|
subql #1,d1 /* adjust count */ \n\
|
|
jeq bcdone_bcopy /* count 0, all done */ \n\
|
|
bcfeven: \n\
|
|
movl a1,d0 \n\
|
|
btst #0,d0 /* dest address odd? */ \n\
|
|
jne bcfbloop /* yes, no hope for alignment, copy bytes */ \n\
|
|
movl d1,d0 /* no, both even */ \n\
|
|
lsrl #2,d0 /* convert count to longword count */ \n\
|
|
jeq bcfbloop /* count 0, skip longword loop */ \n\
|
|
bcflloop: \n\
|
|
movl a0@+,a1@+ /* copy a longword */ \n\
|
|
subql #1,d0 /* adjust count */ \n\
|
|
jne bcflloop /* still more, keep copying */ \n\
|
|
andl #3,d1 /* what remains */ \n\
|
|
jeq bcdone_bcopy /* nothing, all done */ \n\
|
|
bcfbloop: \n\
|
|
movb a0@+,a1@+ /* copy a byte */ \n\
|
|
subql #1,d1 /* adjust count */ \n\
|
|
jne bcfbloop /* still more, keep going */ \n\
|
|
bcdone_bcopy: \n\
|
|
rts \n\
|
|
bcback: \n\
|
|
addl d1,a0 /* src pointer to end */ \n\
|
|
addl d1,a1 /* dest pointer to end */ \n\
|
|
movl a0,d0 \n\
|
|
btst #0,d0 /* src address odd? */ \n\
|
|
jeq bcbeven /* no, skip alignment */ \n\
|
|
movb a0@-,a1@- /* yes, copy a byte */ \n\
|
|
subql #1,d1 /* adjust count */ \n\
|
|
jeq bcdone_bcopy /* count 0, all done */ \n\
|
|
bcbeven: \n\
|
|
movl a1,d0 \n\
|
|
btst #0,d0 /* dest address odd? */ \n\
|
|
jne bcbbloop /* yes, no hope for alignment, copy bytes */ \n\
|
|
movl d1,d0 /* no, both even */ \n\
|
|
lsrl #2,d0 /* convert count to longword count */ \n\
|
|
jeq bcbbloop /* count 0, skip longword loop */ \n\
|
|
bcblloop: \n\
|
|
movl a0@-,a1@- /* copy a longword */ \n\
|
|
subql #1,d0 /* adjust count */ \n\
|
|
jne bcblloop /* still more, keep copying */ \n\
|
|
andl #3,d1 /* what remains */ \n\
|
|
jeq bcdone_bcopy /* nothing, all done */ \n\
|
|
bcbbloop: \n\
|
|
movb a0@-,a1@- /* copy a byte */ \n\
|
|
subql #1,d1 /* adjust count */ \n\
|
|
jne bcbbloop /* still more, keep going */ \n\
|
|
rts \n\
|
|
");
|
|
|
|
#endif
|