Subversion Repositories planix.SVN

Rev

Blame | Last modification | View Log | RSS feed

#define BDNZ    BC      16,0,
        TEXT    memmove(SB), $0
        BR      move

        TEXT    memcpy(SB), $0
move:

/*
 * performance:
 * (tba)
 */

        MOVW    R3, s1+0(FP)
        MOVW    n+8(FP), R9             /* R9 is count */
        MOVW    R3, R10                 /* R10 is to-pointer */
        CMP     R9, $0
        BEQ     ret
        BLT     trap
        MOVW    s2+4(FP), R11           /* R11 is from-pointer */

/*
 * if no more than 16 bytes, just use one lsw/stsw
 */
        CMP     R9, $16
        BLE     fout

        ADD     R9,R11, R13             /* R13 is end from-pointer */
        ADD     R9,R10, R12             /* R12 is end to-pointer */

/*
 * easiest test is copy backwards if
 * destination string has higher mem address
 */
        CMPU    R10, R11
        BGT     back

/*
 * test if both pointers
 * are similarly word aligned
 */
        XOR     R10,R11, R7
        ANDCC   $3,R7
        BNE     fbad

/*
 * move a few bytes to align pointers
 */
        ANDCC   $3,R10,R7
        BEQ     f2
        SUBC    R7, $4, R7
        SUB     R7, R9
        MOVW    R7, XER
        LSW     (R11), R16
        ADD     R7, R11
        STSW    R16, (R10)
        ADD     R7, R10

/*
 * turn R14 into doubleword count
 * copy 16 bytes at a time while there's room.
 */
f2:
        SRAWCC  $4, R9, R14
        BLE     fout
        MOVW    R14, CTR
        SUB     $4, R11
        SUB     $4, R10
f3:
        MOVWU   4(R11), R16
        MOVWU   4(R11), R17
        MOVWU   4(R11), R18
        MOVWU   4(R11), R19
        MOVWU   R16, 4(R10)
        MOVWU   R17, 4(R10)
        MOVWU   R18, 4(R10)
        MOVWU   R19, 4(R10)
        BDNZ    f3
        RLWNMCC $0, R9, $15, R9 /* residue */
        BEQ     ret
        ADD     $4, R11
        ADD     $4, R10

/*
 * move up to 16 bytes through R16 .. R19; aligned and unaligned
 */
fout:
        MOVW    R9, XER
        LSW     (R11), R16
        STSW    R16, (R10)
        BR      ret

/*
 * loop for unaligned copy, then copy up to 15 remaining bytes
 */
fbad:
        SRAWCC  $4, R9, R14
        BLE     f6
        MOVW    R14, CTR
f5:
        LSW     (R11), $16, R16
        ADD     $16, R11
        STSW    R16, $16, (R10)
        ADD     $16, R10
        BDNZ    f5
        RLWNMCC $0, R9, $15, R9 /* residue */
        BEQ     ret
f6:
        MOVW    R9, XER
        LSW     (R11), R16
        STSW    R16, (R10)
        BR      ret

/*
 * whole thing repeated for backwards
 */
back:
        CMP     R9, $4
        BLT     bout

        XOR     R12,R13, R7
        ANDCC   $3,R7
        BNE     bout
b1:
        ANDCC   $3,R13, R7
        BEQ     b2
        MOVBZU  -1(R13), R16
        MOVBZU  R16, -1(R12)
        SUB     $1, R9
        BR      b1
b2:
        SRAWCC  $4, R9, R14
        BLE     b4
        MOVW    R14, CTR
b3:
        MOVWU   -4(R13), R16
        MOVWU   -4(R13), R17
        MOVWU   -4(R13), R18
        MOVWU   -4(R13), R19
        MOVWU   R16, -4(R12)
        MOVWU   R17, -4(R12)
        MOVWU   R18, -4(R12)
        MOVWU   R19, -4(R12)
        BDNZ    b3
        RLWNMCC $0, R9, $15, R9 /* residue */
        BEQ     ret
b4:
        SRAWCC  $2, R9, R14
        BLE     bout
        MOVW    R14, CTR
b5:
        MOVWU   -4(R13), R16
        MOVWU   R16, -4(R12)
        BDNZ    b5
        RLWNMCC $0, R9, $3, R9  /* residue */
        BEQ     ret

bout:
        CMPU    R13, R11
        BLE     ret
        MOVBZU  -1(R13), R16
        MOVBZU  R16, -1(R12)
        BR      bout

trap:
        MOVW    $0, R0
        MOVW    0(R0), R0

ret:
        MOVW    s1+0(FP), R3
        RETURN

Generated by GNU Enscript 1.6.6.