arch/xtensa/lib/usercopy.S

   1 /*
   2  *  arch/xtensa/lib/usercopy.S
   3  *
   4  *  Copy to/from user space (derived from arch/xtensa/lib/hal/memcopy.S)
   5  *
   6  *  DO NOT COMBINE this function with <arch/xtensa/lib/hal/memcopy.S>.
   7  *  It needs to remain separate and distinct.  The hal files are part
   8  *  of the Xtensa link-time HAL, and those files may differ per
   9  *  processor configuration.  Patching the kernel for another
  10  *  processor configuration includes replacing the hal files, and we
  11  *  could lose the special functionality for accessing user-space
  12  *  memory during such a patch.  We sacrifice a little code space here
  13  *  in favor to simplify code maintenance.
  14  *
  15  *  This file is subject to the terms and conditions of the GNU General
  16  *  Public License.  See the file "COPYING" in the main directory of
  17  *  this archive for more details.
  18  *
  19  *  Copyright (C) 2002 Tensilica Inc.
  20  */
  21
  22
  23 /*
  24  * size_t __xtensa_copy_user (void *dst, const void *src, size_t len);
  25  *
  26  * The returned value is the number of bytes not copied.  Implies zero
  27  * is success.
  28  *
  29  * The general case algorithm is as follows:
  30  *   If the destination and source are both aligned,
  31  *     do 16B chunks with a loop, and then finish up with
  32  *     8B, 4B, 2B, and 1B copies conditional on the length.
  33  *   If destination is aligned and source unaligned,
  34  *     do the same, but use SRC to align the source data.
  35  *   If destination is unaligned, align it by conditionally
  36  *     copying 1B and 2B and then retest.
  37  *   This code tries to use fall-through braches for the common
  38  *     case of aligned destinations (except for the branches to
  39  *     the alignment label).
  40  *
  41  * Register use:
  42  *      a0/ return address
  43  *      a1/ stack pointer
  44  *      a2/ return value
  45  *      a3/ src
  46  *      a4/ length
  47  *      a5/ dst
  48  *      a6/ tmp
  49  *      a7/ tmp
  50  *      a8/ tmp
  51  *      a9/ tmp
  52  *      a10/ tmp
  53  *      a11/ original length
  54  */
  55
  56 #include <variant/core.h>
  57
  58 #ifdef __XTENSA_EB__
  59 #define ALIGN(R, W0, W1) src    R, W0, W1
  60 #define SSA8(R) ssa8b R
  61 #else
  62 #define ALIGN(R, W0, W1) src    R, W1, W0
  63 #define SSA8(R) ssa8l R
  64 #endif
  65
  66 /* Load or store instructions that may cause exceptions use the EX macro. */
  67
  68 #define EX(insn,reg1,reg2,offset,handler)       \
  69 9:      insn    reg1, reg2, offset;             \
  70         .section __ex_table, "a";               \
  71         .word   9b, handler;                    \
  72         .previous
  73
  74
  75         .text
  76         .align  4
  77         .global __xtensa_copy_user
  78         .type   __xtensa_copy_user,@function
  79 __xtensa_copy_user:
  80         entry   sp, 16          # minimal stack frame
  81         # a2/ dst, a3/ src, a4/ len
  82         mov     a5, a2          # copy dst so that a2 is return value
  83         mov     a11, a4         # preserve original len for error case
  84 .Lcommon:
  85         bbsi.l  a2, 0, .Ldst1mod2 # if dst is 1 mod 2
  86         bbsi.l  a2, 1, .Ldst2mod4 # if dst is 2 mod 4
  87 .Ldstaligned:   # return here from .Ldstunaligned when dst is aligned
  88         srli    a7, a4, 4       # number of loop iterations with 16B
  89                                 # per iteration
  90         movi    a8, 3             # if source is also aligned,
  91         bnone   a3, a8, .Laligned # then use word copy
  92         SSA8(   a3)             # set shift amount from byte offset
  93         bnez    a4, .Lsrcunaligned
  94         movi    a2, 0           # return success for len==0
  95         retw
  96
  97 /*
  98  * Destination is unaligned
  99  */
 100
 101 .Ldst1mod2:     # dst is only byte aligned
 102         bltui   a4, 7, .Lbytecopy       # do short copies byte by byte
 103
 104         # copy 1 byte
 105         EX(l8ui, a6, a3, 0, fixup)
 106         addi    a3, a3,  1
 107         EX(s8i, a6, a5,  0, fixup)
 108         addi    a5, a5,  1
 109         addi    a4, a4, -1
 110         bbci.l  a5, 1, .Ldstaligned     # if dst is now aligned, then
 111                                         # return to main algorithm
 112 .Ldst2mod4:     # dst 16-bit aligned
 113         # copy 2 bytes
 114         bltui   a4, 6, .Lbytecopy       # do short copies byte by byte
 115         EX(l8ui, a6, a3, 0, fixup)
 116         EX(l8ui, a7, a3, 1, fixup)
 117         addi    a3, a3,  2
 118         EX(s8i, a6, a5,  0, fixup)
 119         EX(s8i, a7, a5,  1, fixup)
 120         addi    a5, a5,  2
 121         addi    a4, a4, -2
 122         j       .Ldstaligned    # dst is now aligned, return to main algorithm
 123
 124 /*
 125  * Byte by byte copy
 126  */
 127         .align  4
 128         .byte   0               # 1 mod 4 alignment for LOOPNEZ
 129                                 # (0 mod 4 alignment for LBEG)
 130 .Lbytecopy:
 131 #if XCHAL_HAVE_LOOPS
 132         loopnez a4, .Lbytecopydone
 133 #else /* !XCHAL_HAVE_LOOPS */
 134         beqz    a4, .Lbytecopydone
 135         add     a7, a3, a4      # a7 = end address for source
 136 #endif /* !XCHAL_HAVE_LOOPS */
 137 .Lnextbyte:
 138         EX(l8ui, a6, a3, 0, fixup)
 139         addi    a3, a3, 1
 140         EX(s8i, a6, a5, 0, fixup)
 141         addi    a5, a5, 1
 142 #if !XCHAL_HAVE_LOOPS
 143         blt     a3, a7, .Lnextbyte
 144 #endif /* !XCHAL_HAVE_LOOPS */
 145 .Lbytecopydone:
 146         movi    a2, 0           # return success for len bytes copied
 147         retw
 148
 149 /*
 150  * Destination and source are word-aligned.
 151  */
 152         # copy 16 bytes per iteration for word-aligned dst and word-aligned src
 153         .align  4               # 1 mod 4 alignment for LOOPNEZ
 154         .byte   0               # (0 mod 4 alignment for LBEG)
 155 .Laligned:
 156 #if XCHAL_HAVE_LOOPS
 157         loopnez a7, .Loop1done
 158 #else /* !XCHAL_HAVE_LOOPS */
 159         beqz    a7, .Loop1done
 160         slli    a8, a7, 4
 161         add     a8, a8, a3      # a8 = end of last 16B source chunk
 162 #endif /* !XCHAL_HAVE_LOOPS */
 163 .Loop1:
 164         EX(l32i, a6, a3,  0, fixup)
 165         EX(l32i, a7, a3,  4, fixup)
 166         EX(s32i, a6, a5,  0, fixup)
 167         EX(l32i, a6, a3,  8, fixup)
 168         EX(s32i, a7, a5,  4, fixup)
 169         EX(l32i, a7, a3, 12, fixup)
 170         EX(s32i, a6, a5,  8, fixup)
 171         addi    a3, a3, 16
 172         EX(s32i, a7, a5, 12, fixup)
 173         addi    a5, a5, 16
 174 #if !XCHAL_HAVE_LOOPS
 175         blt     a3, a8, .Loop1
 176 #endif /* !XCHAL_HAVE_LOOPS */
 177 .Loop1done:
 178         bbci.l  a4, 3, .L2
 179         # copy 8 bytes
 180         EX(l32i, a6, a3,  0, fixup)
 181         EX(l32i, a7, a3,  4, fixup)
 182         addi    a3, a3,  8
 183         EX(s32i, a6, a5,  0, fixup)
 184         EX(s32i, a7, a5,  4, fixup)
 185         addi    a5, a5,  8
 186 .L2:
 187         bbci.l  a4, 2, .L3
 188         # copy 4 bytes
 189         EX(l32i, a6, a3,  0, fixup)
 190         addi    a3, a3,  4
 191         EX(s32i, a6, a5,  0, fixup)
 192         addi    a5, a5,  4
 193 .L3:
 194         bbci.l  a4, 1, .L4
 195         # copy 2 bytes
 196         EX(l16ui, a6, a3,  0, fixup)
 197         addi    a3, a3,  2
 198         EX(s16i,  a6, a5,  0, fixup)
 199         addi    a5, a5,  2
 200 .L4:
 201         bbci.l  a4, 0, .L5
 202         # copy 1 byte
 203         EX(l8ui, a6, a3,  0, fixup)
 204         EX(s8i,  a6, a5,  0, fixup)
 205 .L5:
 206         movi    a2, 0           # return success for len bytes copied
 207         retw
 208
 209 /*
 210  * Destination is aligned, Source is unaligned
 211  */
 212
 213         .align  4
 214         .byte   0               # 1 mod 4 alignement for LOOPNEZ
 215                                 # (0 mod 4 alignment for LBEG)
 216 .Lsrcunaligned:
 217         # copy 16 bytes per iteration for word-aligned dst and unaligned src
 218         and     a10, a3, a8     # save unalignment offset for below
 219         sub     a3, a3, a10     # align a3 (to avoid sim warnings only; not needed for hardware)
 220         EX(l32i, a6, a3, 0, fixup)      # load first word
 221 #if XCHAL_HAVE_LOOPS
 222         loopnez a7, .Loop2done
 223 #else /* !XCHAL_HAVE_LOOPS */
 224         beqz    a7, .Loop2done
 225         slli    a12, a7, 4
 226         add     a12, a12, a3    # a12 = end of last 16B source chunk
 227 #endif /* !XCHAL_HAVE_LOOPS */
 228 .Loop2:
 229         EX(l32i, a7, a3,  4, fixup)
 230         EX(l32i, a8, a3,  8, fixup)
 231         ALIGN(  a6, a6, a7)
 232         EX(s32i, a6, a5,  0, fixup)
 233         EX(l32i, a9, a3, 12, fixup)
 234         ALIGN(  a7, a7, a8)
 235         EX(s32i, a7, a5,  4, fixup)
 236         EX(l32i, a6, a3, 16, fixup)
 237         ALIGN(  a8, a8, a9)
 238         EX(s32i, a8, a5,  8, fixup)
 239         addi    a3, a3, 16
 240         ALIGN(  a9, a9, a6)
 241         EX(s32i, a9, a5, 12, fixup)
 242         addi    a5, a5, 16
 243 #if !XCHAL_HAVE_LOOPS
 244         blt     a3, a12, .Loop2
 245 #endif /* !XCHAL_HAVE_LOOPS */
 246 .Loop2done:
 247         bbci.l  a4, 3, .L12
 248         # copy 8 bytes
 249         EX(l32i, a7, a3,  4, fixup)
 250         EX(l32i, a8, a3,  8, fixup)
 251         ALIGN(  a6, a6, a7)
 252         EX(s32i, a6, a5,  0, fixup)
 253         addi    a3, a3,  8
 254         ALIGN(  a7, a7, a8)
 255         EX(s32i, a7, a5,  4, fixup)
 256         addi    a5, a5,  8
 257         mov     a6, a8
 258 .L12:
 259         bbci.l  a4, 2, .L13
 260         # copy 4 bytes
 261         EX(l32i, a7, a3,  4, fixup)
 262         addi    a3, a3,  4
 263         ALIGN(  a6, a6, a7)
 264         EX(s32i, a6, a5,  0, fixup)
 265         addi    a5, a5,  4
 266         mov     a6, a7
 267 .L13:
 268         add     a3, a3, a10     # readjust a3 with correct misalignment
 269         bbci.l  a4, 1, .L14
 270         # copy 2 bytes
 271         EX(l8ui, a6, a3,  0, fixup)
 272         EX(l8ui, a7, a3,  1, fixup)
 273         addi    a3, a3,  2
 274         EX(s8i, a6, a5,  0, fixup)
 275         EX(s8i, a7, a5,  1, fixup)
 276         addi    a5, a5,  2
 277 .L14:
 278         bbci.l  a4, 0, .L15
 279         # copy 1 byte
 280         EX(l8ui, a6, a3,  0, fixup)
 281         EX(s8i,  a6, a5,  0, fixup)
 282 .L15:
 283         movi    a2, 0           # return success for len bytes copied
 284         retw
 285
 286
 287         .section .fixup, "ax"
 288         .align  4
 289
 290 /* a2 = original dst; a5 = current dst; a11= original len
 291  * bytes_copied = a5 - a2
 292  * retval = bytes_not_copied = original len - bytes_copied
 293  * retval = a11 - (a5 - a2)
 294  */
 295
 296
 297 fixup:
 298         sub     a2, a5, a2      /* a2 <-- bytes copied */
 299         sub     a2, a11, a2     /* a2 <-- bytes not copied */
 300         retw