arch/ia64/lib/idiv32.S

   1 /*
   2  * Copyright (C) 2000 Hewlett-Packard Co
   3  * Copyright (C) 2000 David Mosberger-Tang <davidm@hpl.hp.com>
   4  *
   5  * 32-bit integer division.
   6  *
   7  * This code is based on the application note entitled "Divide, Square Root
   8  * and Remainder Algorithms for the IA-64 Architecture".  This document
   9  * is available as Intel document number 248725-002 or via the web at
  10  * http://developer.intel.com/software/opensource/numerics/
  11  *
  12  * For more details on the theory behind these algorithms, see "IA-64
  13  * and Elementary Functions" by Peter Markstein; HP Professional Books
  14  * (http://www.goodreads.com/book/show/2019887.Ia_64_and_Elementary_Functions)
  15  */
  16
  17 #include <asm/asmmacro.h>
  18 #include <asm/export.h>
  19
  20 #ifdef MODULO
  21 # define OP     mod
  22 #else
  23 # define OP     div
  24 #endif
  25
  26 #ifdef UNSIGNED
  27 # define SGN    u
  28 # define EXTEND zxt4
  29 # define INT_TO_FP(a,b) fcvt.xuf.s1 a=b
  30 # define FP_TO_INT(a,b) fcvt.fxu.trunc.s1 a=b
  31 #else
  32 # define SGN
  33 # define EXTEND sxt4
  34 # define INT_TO_FP(a,b) fcvt.xf a=b
  35 # define FP_TO_INT(a,b) fcvt.fx.trunc.s1 a=b
  36 #endif
  37
  38 #define PASTE1(a,b)     a##b
  39 #define PASTE(a,b)      PASTE1(a,b)
  40 #define NAME            PASTE(PASTE(__,SGN),PASTE(OP,si3))
  41
  42 GLOBAL_ENTRY(NAME)
  43         .regstk 2,0,0,0
  44         // Transfer inputs to FP registers.
  45         mov r2 = 0xffdd                 // r2 = -34 + 65535 (fp reg format bias)
  46         EXTEND in0 = in0                // in0 = a
  47         EXTEND in1 = in1                // in1 = b
  48         ;;
  49         setf.sig f8 = in0
  50         setf.sig f9 = in1
  51 #ifdef MODULO
  52         sub in1 = r0, in1               // in1 = -b
  53 #endif
  54         ;;
  55         // Convert the inputs to FP, to avoid FP software-assist faults.
  56         INT_TO_FP(f8, f8)
  57         INT_TO_FP(f9, f9)
  58         ;;
  59         setf.exp f7 = r2                // f7 = 2^-34
  60         frcpa.s1 f6, p6 = f8, f9        // y0 = frcpa(b)
  61         ;;
  62 (p6)    fmpy.s1 f8 = f8, f6             // q0 = a*y0
  63 (p6)    fnma.s1 f6 = f9, f6, f1         // e0 = -b*y0 + 1
  64         ;;
  65 #ifdef MODULO
  66         setf.sig f9 = in1               // f9 = -b
  67 #endif
  68 (p6)    fma.s1 f8 = f6, f8, f8          // q1 = e0*q0 + q0
  69 (p6)    fma.s1 f6 = f6, f6, f7          // e1 = e0*e0 + 2^-34
  70         ;;
  71 #ifdef MODULO
  72         setf.sig f7 = in0
  73 #endif
  74 (p6)    fma.s1 f6 = f6, f8, f8          // q2 = e1*q1 + q1
  75         ;;
  76         FP_TO_INT(f6, f6)               // q = trunc(q2)
  77         ;;
  78 #ifdef MODULO
  79         xma.l f6 = f6, f9, f7           // r = q*(-b) + a
  80         ;;
  81 #endif
  82         getf.sig r8 = f6                // transfer result to result register
  83         br.ret.sptk.many rp
  84 END(NAME)
  85 EXPORT_SYMBOL(NAME)