arch/arm/vfp/vfpsingle.c

   1 /*
   2  *  linux/arch/arm/vfp/vfpsingle.c
   3  *
   4  * This code is derived in part from John R. Housers softfloat library, which
   5  * carries the following notice:
   6  *
   7  * ===========================================================================
   8  * This C source file is part of the SoftFloat IEC/IEEE Floating-point
   9  * Arithmetic Package, Release 2.
  10  *
  11  * Written by John R. Hauser.  This work was made possible in part by the
  12  * International Computer Science Institute, located at Suite 600, 1947 Center
  13  * Street, Berkeley, California 94704.  Funding was partially provided by the
  14  * National Science Foundation under grant MIP-9311980.  The original version
  15  * of this code was written as part of a project to build a fixed-point vector
  16  * processor in collaboration with the University of California at Berkeley,
  17  * overseen by Profs. Nelson Morgan and John Wawrzynek.  More information
  18  * is available through the web page `http://HTTP.CS.Berkeley.EDU/~jhauser/
  19  * arithmetic/softfloat.html'.
  20  *
  21  * THIS SOFTWARE IS DISTRIBUTED AS IS, FOR FREE.  Although reasonable effort
  22  * has been made to avoid it, THIS SOFTWARE MAY CONTAIN FAULTS THAT WILL AT
  23  * TIMES RESULT IN INCORRECT BEHAVIOR.  USE OF THIS SOFTWARE IS RESTRICTED TO
  24  * PERSONS AND ORGANIZATIONS WHO CAN AND WILL TAKE FULL RESPONSIBILITY FOR ANY
  25  * AND ALL LOSSES, COSTS, OR OTHER PROBLEMS ARISING FROM ITS USE.
  26  *
  27  * Derivative works are acceptable, even for commercial purposes, so long as
  28  * (1) they include prominent notice that the work is derivative, and (2) they
  29  * include prominent notice akin to these three paragraphs for those parts of
  30  * this code that are retained.
  31  * ===========================================================================
  32  */
  33 #include <linux/kernel.h>
  34 #include <linux/bitops.h>
  35
  36 #include <asm/div64.h>
  37 #include <asm/ptrace.h>
  38 #include <asm/vfp.h>
  39
  40 #include "vfpinstr.h"
  41 #include "vfp.h"
  42
  43 static struct vfp_single vfp_single_default_qnan = {
  44         .exponent       = 255,
  45         .sign           = 0,
  46         .significand    = VFP_SINGLE_SIGNIFICAND_QNAN,
  47 };
  48
  49 static void vfp_single_dump(const char *str, struct vfp_single *s)
  50 {
  51         pr_debug("VFP: %s: sign=%d exponent=%d significand=%08x\n",
  52                  str, s->sign != 0, s->exponent, s->significand);
  53 }
  54
  55 static void vfp_single_normalise_denormal(struct vfp_single *vs)
  56 {
  57         int bits = 31 - fls(vs->significand);
  58
  59         vfp_single_dump("normalise_denormal: in", vs);
  60
  61         if (bits) {
  62                 vs->exponent -= bits - 1;
  63                 vs->significand <<= bits;
  64         }
  65
  66         vfp_single_dump("normalise_denormal: out", vs);
  67 }
  68
  69 #ifndef DEBUG
  70 #define vfp_single_normaliseround(sd,vsd,fpscr,except,func) __vfp_single_normaliseround(sd,vsd,fpscr,except)
  71 u32 __vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions)
  72 #else
  73 u32 vfp_single_normaliseround(int sd, struct vfp_single *vs, u32 fpscr, u32 exceptions, const char *func)
  74 #endif
  75 {
  76         u32 significand, incr, rmode;
  77         int exponent, shift, underflow;
  78
  79         vfp_single_dump("pack: in", vs);
  80
  81         /*
  82          * Infinities and NaNs are a special case.
  83          */
  84         if (vs->exponent == 255 && (vs->significand == 0 || exceptions))
  85                 goto pack;
  86
  87         /*
  88          * Special-case zero.
  89          */
  90         if (vs->significand == 0) {
  91                 vs->exponent = 0;
  92                 goto pack;
  93         }
  94
  95         exponent = vs->exponent;
  96         significand = vs->significand;
  97
  98         /*
  99          * Normalise first.  Note that we shift the significand up to
 100          * bit 31, so we have VFP_SINGLE_LOW_BITS + 1 below the least
 101          * significant bit.
 102          */
 103         shift = 32 - fls(significand);
 104         if (shift < 32 && shift) {
 105                 exponent -= shift;
 106                 significand <<= shift;
 107         }
 108
 109 #ifdef DEBUG
 110         vs->exponent = exponent;
 111         vs->significand = significand;
 112         vfp_single_dump("pack: normalised", vs);
 113 #endif
 114
 115         /*
 116          * Tiny number?
 117          */
 118         underflow = exponent < 0;
 119         if (underflow) {
 120                 significand = vfp_shiftright32jamming(significand, -exponent);
 121                 exponent = 0;
 122 #ifdef DEBUG
 123                 vs->exponent = exponent;
 124                 vs->significand = significand;
 125                 vfp_single_dump("pack: tiny number", vs);
 126 #endif
 127                 if (!(significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1)))
 128                         underflow = 0;
 129         }
 130
 131         /*
 132          * Select rounding increment.
 133          */
 134         incr = 0;
 135         rmode = fpscr & FPSCR_RMODE_MASK;
 136
 137         if (rmode == FPSCR_ROUND_NEAREST) {
 138                 incr = 1 << VFP_SINGLE_LOW_BITS;
 139                 if ((significand & (1 << (VFP_SINGLE_LOW_BITS + 1))) == 0)
 140                         incr -= 1;
 141         } else if (rmode == FPSCR_ROUND_TOZERO) {
 142                 incr = 0;
 143         } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vs->sign != 0))
 144                 incr = (1 << (VFP_SINGLE_LOW_BITS + 1)) - 1;
 145
 146         pr_debug("VFP: rounding increment = 0x%08x\n", incr);
 147
 148         /*
 149          * Is our rounding going to overflow?
 150          */
 151         if ((significand + incr) < significand) {
 152                 exponent += 1;
 153                 significand = (significand >> 1) | (significand & 1);
 154                 incr >>= 1;
 155 #ifdef DEBUG
 156                 vs->exponent = exponent;
 157                 vs->significand = significand;
 158                 vfp_single_dump("pack: overflow", vs);
 159 #endif
 160         }
 161
 162         /*
 163          * If any of the low bits (which will be shifted out of the
 164          * number) are non-zero, the result is inexact.
 165          */
 166         if (significand & ((1 << (VFP_SINGLE_LOW_BITS + 1)) - 1))
 167                 exceptions |= FPSCR_IXC;
 168
 169         /*
 170          * Do our rounding.
 171          */
 172         significand += incr;
 173
 174         /*
 175          * Infinity?
 176          */
 177         if (exponent >= 254) {
 178                 exceptions |= FPSCR_OFC | FPSCR_IXC;
 179                 if (incr == 0) {
 180                         vs->exponent = 253;
 181                         vs->significand = 0x7fffffff;
 182                 } else {
 183                         vs->exponent = 255;             /* infinity */
 184                         vs->significand = 0;
 185                 }
 186         } else {
 187                 if (significand >> (VFP_SINGLE_LOW_BITS + 1) == 0)
 188                         exponent = 0;
 189                 if (exponent || significand > 0x80000000)
 190                         underflow = 0;
 191                 if (underflow)
 192                         exceptions |= FPSCR_UFC;
 193                 vs->exponent = exponent;
 194                 vs->significand = significand >> 1;
 195         }
 196
 197  pack:
 198         vfp_single_dump("pack: final", vs);
 199         {
 200                 s32 d = vfp_single_pack(vs);
 201 #ifdef DEBUG
 202                 pr_debug("VFP: %s: d(s%d)=%08x exceptions=%08x\n", func,
 203                          sd, d, exceptions);
 204 #endif
 205                 vfp_put_float(d, sd);
 206         }
 207
 208         return exceptions;
 209 }
 210
 211 /*
 212  * Propagate the NaN, setting exceptions if it is signalling.
 213  * 'n' is always a NaN.  'm' may be a number, NaN or infinity.
 214  */
 215 static u32
 216 vfp_propagate_nan(struct vfp_single *vsd, struct vfp_single *vsn,
 217                   struct vfp_single *vsm, u32 fpscr)
 218 {
 219         struct vfp_single *nan;
 220         int tn, tm = 0;
 221
 222         tn = vfp_single_type(vsn);
 223
 224         if (vsm)
 225                 tm = vfp_single_type(vsm);
 226
 227         if (fpscr & FPSCR_DEFAULT_NAN)
 228                 /*
 229                  * Default NaN mode - always returns a quiet NaN
 230                  */
 231                 nan = &vfp_single_default_qnan;
 232         else {
 233                 /*
 234                  * Contemporary mode - select the first signalling
 235                  * NAN, or if neither are signalling, the first
 236                  * quiet NAN.
 237                  */
 238                 if (tn == VFP_SNAN || (tm != VFP_SNAN && tn == VFP_QNAN))
 239                         nan = vsn;
 240                 else
 241                         nan = vsm;
 242                 /*
 243                  * Make the NaN quiet.
 244                  */
 245                 nan->significand |= VFP_SINGLE_SIGNIFICAND_QNAN;
 246         }
 247
 248         *vsd = *nan;
 249
 250         /*
 251          * If one was a signalling NAN, raise invalid operation.
 252          */
 253         return tn == VFP_SNAN || tm == VFP_SNAN ? FPSCR_IOC : VFP_NAN_FLAG;
 254 }
 255
 256
 257 /*
 258  * Extended operations
 259  */
 260 static u32 vfp_single_fabs(int sd, int unused, s32 m, u32 fpscr)
 261 {
 262         vfp_put_float(vfp_single_packed_abs(m), sd);
 263         return 0;
 264 }
 265
 266 static u32 vfp_single_fcpy(int sd, int unused, s32 m, u32 fpscr)
 267 {
 268         vfp_put_float(m, sd);
 269         return 0;
 270 }
 271
 272 static u32 vfp_single_fneg(int sd, int unused, s32 m, u32 fpscr)
 273 {
 274         vfp_put_float(vfp_single_packed_negate(m), sd);
 275         return 0;
 276 }
 277
 278 static const u16 sqrt_oddadjust[] = {
 279         0x0004, 0x0022, 0x005d, 0x00b1, 0x011d, 0x019f, 0x0236, 0x02e0,
 280         0x039c, 0x0468, 0x0545, 0x0631, 0x072b, 0x0832, 0x0946, 0x0a67
 281 };
 282
 283 static const u16 sqrt_evenadjust[] = {
 284         0x0a2d, 0x08af, 0x075a, 0x0629, 0x051a, 0x0429, 0x0356, 0x029e,
 285         0x0200, 0x0179, 0x0109, 0x00af, 0x0068, 0x0034, 0x0012, 0x0002
 286 };
 287
 288 u32 vfp_estimate_sqrt_significand(u32 exponent, u32 significand)
 289 {
 290         int index;
 291         u32 z, a;
 292
 293         if ((significand & 0xc0000000) != 0x40000000) {
 294                 printk(KERN_WARNING "VFP: estimate_sqrt: invalid significand\n");
 295         }
 296
 297         a = significand << 1;
 298         index = (a >> 27) & 15;
 299         if (exponent & 1) {
 300                 z = 0x4000 + (a >> 17) - sqrt_oddadjust[index];
 301                 z = ((a / z) << 14) + (z << 15);
 302                 a >>= 1;
 303         } else {
 304                 z = 0x8000 + (a >> 17) - sqrt_evenadjust[index];
 305                 z = a / z + z;
 306                 z = (z >= 0x20000) ? 0xffff8000 : (z << 15);
 307                 if (z <= a)
 308                         return (s32)a >> 1;
 309         }
 310         {
 311                 u64 v = (u64)a << 31;
 312                 do_div(v, z);
 313                 return v + (z >> 1);
 314         }
 315 }
 316
 317 static u32 vfp_single_fsqrt(int sd, int unused, s32 m, u32 fpscr)
 318 {
 319         struct vfp_single vsm, vsd;
 320         int ret, tm;
 321
 322         vfp_single_unpack(&vsm, m);
 323         tm = vfp_single_type(&vsm);
 324         if (tm & (VFP_NAN|VFP_INFINITY)) {
 325                 struct vfp_single *vsp = &vsd;
 326
 327                 if (tm & VFP_NAN)
 328                         ret = vfp_propagate_nan(vsp, &vsm, NULL, fpscr);
 329                 else if (vsm.sign == 0) {
 330  sqrt_copy:
 331                         vsp = &vsm;
 332                         ret = 0;
 333                 } else {
 334  sqrt_invalid:
 335                         vsp = &vfp_single_default_qnan;
 336                         ret = FPSCR_IOC;
 337                 }
 338                 vfp_put_float(vfp_single_pack(vsp), sd);
 339                 return ret;
 340         }
 341
 342         /*
 343          * sqrt(+/- 0) == +/- 0
 344          */
 345         if (tm & VFP_ZERO)
 346                 goto sqrt_copy;
 347
 348         /*
 349          * Normalise a denormalised number
 350          */
 351         if (tm & VFP_DENORMAL)
 352                 vfp_single_normalise_denormal(&vsm);
 353
 354         /*
 355          * sqrt(<0) = invalid
 356          */
 357         if (vsm.sign)
 358                 goto sqrt_invalid;
 359
 360         vfp_single_dump("sqrt", &vsm);
 361
 362         /*
 363          * Estimate the square root.
 364          */
 365         vsd.sign = 0;
 366         vsd.exponent = ((vsm.exponent - 127) >> 1) + 127;
 367         vsd.significand = vfp_estimate_sqrt_significand(vsm.exponent, vsm.significand) + 2;
 368
 369         vfp_single_dump("sqrt estimate", &vsd);
 370
 371         /*
 372          * And now adjust.
 373          */
 374         if ((vsd.significand & VFP_SINGLE_LOW_BITS_MASK) <= 5) {
 375                 if (vsd.significand < 2) {
 376                         vsd.significand = 0xffffffff;
 377                 } else {
 378                         u64 term;
 379                         s64 rem;
 380                         vsm.significand <<= !(vsm.exponent & 1);
 381                         term = (u64)vsd.significand * vsd.significand;
 382                         rem = ((u64)vsm.significand << 32) - term;
 383
 384                         pr_debug("VFP: term=%016llx rem=%016llx\n", term, rem);
 385
 386                         while (rem < 0) {
 387                                 vsd.significand -= 1;
 388                                 rem += ((u64)vsd.significand << 1) | 1;
 389                         }
 390                         vsd.significand |= rem != 0;
 391                 }
 392         }
 393         vsd.significand = vfp_shiftright32jamming(vsd.significand, 1);
 394
 395         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fsqrt");
 396 }
 397
 398 /*
 399  * Equal        := ZC
 400  * Less than    := N
 401  * Greater than := C
 402  * Unordered    := CV
 403  */
 404 static u32 vfp_compare(int sd, int signal_on_qnan, s32 m, u32 fpscr)
 405 {
 406         s32 d;
 407         u32 ret = 0;
 408
 409         d = vfp_get_float(sd);
 410         if (vfp_single_packed_exponent(m) == 255 && vfp_single_packed_mantissa(m)) {
 411                 ret |= FPSCR_C | FPSCR_V;
 412                 if (signal_on_qnan || !(vfp_single_packed_mantissa(m) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 413                         /*
 414                          * Signalling NaN, or signalling on quiet NaN
 415                          */
 416                         ret |= FPSCR_IOC;
 417         }
 418
 419         if (vfp_single_packed_exponent(d) == 255 && vfp_single_packed_mantissa(d)) {
 420                 ret |= FPSCR_C | FPSCR_V;
 421                 if (signal_on_qnan || !(vfp_single_packed_mantissa(d) & (1 << (VFP_SINGLE_MANTISSA_BITS - 1))))
 422                         /*
 423                          * Signalling NaN, or signalling on quiet NaN
 424                          */
 425                         ret |= FPSCR_IOC;
 426         }
 427
 428         if (ret == 0) {
 429                 if (d == m || vfp_single_packed_abs(d | m) == 0) {
 430                         /*
 431                          * equal
 432                          */
 433                         ret |= FPSCR_Z | FPSCR_C;
 434                 } else if (vfp_single_packed_sign(d ^ m)) {
 435                         /*
 436                          * different signs
 437                          */
 438                         if (vfp_single_packed_sign(d))
 439                                 /*
 440                                  * d is negative, so d < m
 441                                  */
 442                                 ret |= FPSCR_N;
 443                         else
 444                                 /*
 445                                  * d is positive, so d > m
 446                                  */
 447                                 ret |= FPSCR_C;
 448                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d < m)) {
 449                         /*
 450                          * d < m
 451                          */
 452                         ret |= FPSCR_N;
 453                 } else if ((vfp_single_packed_sign(d) != 0) ^ (d > m)) {
 454                         /*
 455                          * d > m
 456                          */
 457                         ret |= FPSCR_C;
 458                 }
 459         }
 460         return ret;
 461 }
 462
 463 static u32 vfp_single_fcmp(int sd, int unused, s32 m, u32 fpscr)
 464 {
 465         return vfp_compare(sd, 0, m, fpscr);
 466 }
 467
 468 static u32 vfp_single_fcmpe(int sd, int unused, s32 m, u32 fpscr)
 469 {
 470         return vfp_compare(sd, 1, m, fpscr);
 471 }
 472
 473 static u32 vfp_single_fcmpz(int sd, int unused, s32 m, u32 fpscr)
 474 {
 475         return vfp_compare(sd, 0, 0, fpscr);
 476 }
 477
 478 static u32 vfp_single_fcmpez(int sd, int unused, s32 m, u32 fpscr)
 479 {
 480         return vfp_compare(sd, 1, 0, fpscr);
 481 }
 482
 483 static u32 vfp_single_fcvtd(int dd, int unused, s32 m, u32 fpscr)
 484 {
 485         struct vfp_single vsm;
 486         struct vfp_double vdd;
 487         int tm;
 488         u32 exceptions = 0;
 489
 490         vfp_single_unpack(&vsm, m);
 491
 492         tm = vfp_single_type(&vsm);
 493
 494         /*
 495          * If we have a signalling NaN, signal invalid operation.
 496          */
 497         if (tm == VFP_SNAN)
 498                 exceptions = FPSCR_IOC;
 499
 500         if (tm & VFP_DENORMAL)
 501                 vfp_single_normalise_denormal(&vsm);
 502
 503         vdd.sign = vsm.sign;
 504         vdd.significand = (u64)vsm.significand << 32;
 505
 506         /*
 507          * If we have an infinity or NaN, the exponent must be 2047.
 508          */
 509         if (tm & (VFP_INFINITY|VFP_NAN)) {
 510                 vdd.exponent = 2047;
 511                 if (tm == VFP_QNAN)
 512                         vdd.significand |= VFP_DOUBLE_SIGNIFICAND_QNAN;
 513                 goto pack_nan;
 514         } else if (tm & VFP_ZERO)
 515                 vdd.exponent = 0;
 516         else
 517                 vdd.exponent = vsm.exponent + (1023 - 127);
 518
 519         return vfp_double_normaliseround(dd, &vdd, fpscr, exceptions, "fcvtd");
 520
 521  pack_nan:
 522         vfp_put_double(vfp_double_pack(&vdd), dd);
 523         return exceptions;
 524 }
 525
 526 static u32 vfp_single_fuito(int sd, int unused, s32 m, u32 fpscr)
 527 {
 528         struct vfp_single vs;
 529
 530         vs.sign = 0;
 531         vs.exponent = 127 + 31 - 1;
 532         vs.significand = (u32)m;
 533
 534         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fuito");
 535 }
 536
 537 static u32 vfp_single_fsito(int sd, int unused, s32 m, u32 fpscr)
 538 {
 539         struct vfp_single vs;
 540
 541         vs.sign = (m & 0x80000000) >> 16;
 542         vs.exponent = 127 + 31 - 1;
 543         vs.significand = vs.sign ? -m : m;
 544
 545         return vfp_single_normaliseround(sd, &vs, fpscr, 0, "fsito");
 546 }
 547
 548 static u32 vfp_single_ftoui(int sd, int unused, s32 m, u32 fpscr)
 549 {
 550         struct vfp_single vsm;
 551         u32 d, exceptions = 0;
 552         int rmode = fpscr & FPSCR_RMODE_MASK;
 553         int tm;
 554
 555         vfp_single_unpack(&vsm, m);
 556         vfp_single_dump("VSM", &vsm);
 557
 558         /*
 559          * Do we have a denormalised number?
 560          */
 561         tm = vfp_single_type(&vsm);
 562         if (tm & VFP_DENORMAL)
 563                 exceptions |= FPSCR_IDC;
 564
 565         if (tm & VFP_NAN)
 566                 vsm.sign = 0;
 567
 568         if (vsm.exponent >= 127 + 32) {
 569                 d = vsm.sign ? 0 : 0xffffffff;
 570                 exceptions = FPSCR_IOC;
 571         } else if (vsm.exponent >= 127 - 1) {
 572                 int shift = 127 + 31 - vsm.exponent;
 573                 u32 rem, incr = 0;
 574
 575                 /*
 576                  * 2^0 <= m < 2^32-2^8
 577                  */
 578                 d = (vsm.significand << 1) >> shift;
 579                 rem = vsm.significand << (33 - shift);
 580
 581                 if (rmode == FPSCR_ROUND_NEAREST) {
 582                         incr = 0x80000000;
 583                         if ((d & 1) == 0)
 584                                 incr -= 1;
 585                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 586                         incr = 0;
 587                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 588                         incr = ~0;
 589                 }
 590
 591                 if ((rem + incr) < rem) {
 592                         if (d < 0xffffffff)
 593                                 d += 1;
 594                         else
 595                                 exceptions |= FPSCR_IOC;
 596                 }
 597
 598                 if (d && vsm.sign) {
 599                         d = 0;
 600                         exceptions |= FPSCR_IOC;
 601                 } else if (rem)
 602                         exceptions |= FPSCR_IXC;
 603         } else {
 604                 d = 0;
 605                 if (vsm.exponent | vsm.significand) {
 606                         exceptions |= FPSCR_IXC;
 607                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 608                                 d = 1;
 609                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign) {
 610                                 d = 0;
 611                                 exceptions |= FPSCR_IOC;
 612                         }
 613                 }
 614         }
 615
 616         pr_debug("VFP: ftoui: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 617
 618         vfp_put_float(d, sd);
 619
 620         return exceptions;
 621 }
 622
 623 static u32 vfp_single_ftouiz(int sd, int unused, s32 m, u32 fpscr)
 624 {
 625         return vfp_single_ftoui(sd, unused, m, FPSCR_ROUND_TOZERO);
 626 }
 627
 628 static u32 vfp_single_ftosi(int sd, int unused, s32 m, u32 fpscr)
 629 {
 630         struct vfp_single vsm;
 631         u32 d, exceptions = 0;
 632         int rmode = fpscr & FPSCR_RMODE_MASK;
 633         int tm;
 634
 635         vfp_single_unpack(&vsm, m);
 636         vfp_single_dump("VSM", &vsm);
 637
 638         /*
 639          * Do we have a denormalised number?
 640          */
 641         tm = vfp_single_type(&vsm);
 642         if (vfp_single_type(&vsm) & VFP_DENORMAL)
 643                 exceptions |= FPSCR_IDC;
 644
 645         if (tm & VFP_NAN) {
 646                 d = 0;
 647                 exceptions |= FPSCR_IOC;
 648         } else if (vsm.exponent >= 127 + 32) {
 649                 /*
 650                  * m >= 2^31-2^7: invalid
 651                  */
 652                 d = 0x7fffffff;
 653                 if (vsm.sign)
 654                         d = ~d;
 655                 exceptions |= FPSCR_IOC;
 656         } else if (vsm.exponent >= 127 - 1) {
 657                 int shift = 127 + 31 - vsm.exponent;
 658                 u32 rem, incr = 0;
 659
 660                 /* 2^0 <= m <= 2^31-2^7 */
 661                 d = (vsm.significand << 1) >> shift;
 662                 rem = vsm.significand << (33 - shift);
 663
 664                 if (rmode == FPSCR_ROUND_NEAREST) {
 665                         incr = 0x80000000;
 666                         if ((d & 1) == 0)
 667                                 incr -= 1;
 668                 } else if (rmode == FPSCR_ROUND_TOZERO) {
 669                         incr = 0;
 670                 } else if ((rmode == FPSCR_ROUND_PLUSINF) ^ (vsm.sign != 0)) {
 671                         incr = ~0;
 672                 }
 673
 674                 if ((rem + incr) < rem && d < 0xffffffff)
 675                         d += 1;
 676                 if (d > 0x7fffffff + (vsm.sign != 0)) {
 677                         d = 0x7fffffff + (vsm.sign != 0);
 678                         exceptions |= FPSCR_IOC;
 679                 } else if (rem)
 680                         exceptions |= FPSCR_IXC;
 681
 682                 if (vsm.sign)
 683                         d = -d;
 684         } else {
 685                 d = 0;
 686                 if (vsm.exponent | vsm.significand) {
 687                         exceptions |= FPSCR_IXC;
 688                         if (rmode == FPSCR_ROUND_PLUSINF && vsm.sign == 0)
 689                                 d = 1;
 690                         else if (rmode == FPSCR_ROUND_MINUSINF && vsm.sign)
 691                                 d = -1;
 692                 }
 693         }
 694
 695         pr_debug("VFP: ftosi: d(s%d)=%08x exceptions=%08x\n", sd, d, exceptions);
 696
 697         vfp_put_float((s32)d, sd);
 698
 699         return exceptions;
 700 }
 701
 702 static u32 vfp_single_ftosiz(int sd, int unused, s32 m, u32 fpscr)
 703 {
 704         return vfp_single_ftosi(sd, unused, m, FPSCR_ROUND_TOZERO);
 705 }
 706
 707 static struct op fops_ext[32] = {
 708         [FEXT_TO_IDX(FEXT_FCPY)]        = { vfp_single_fcpy,   0 },
 709         [FEXT_TO_IDX(FEXT_FABS)]        = { vfp_single_fabs,   0 },
 710         [FEXT_TO_IDX(FEXT_FNEG)]        = { vfp_single_fneg,   0 },
 711         [FEXT_TO_IDX(FEXT_FSQRT)]       = { vfp_single_fsqrt,  0 },
 712         [FEXT_TO_IDX(FEXT_FCMP)]        = { vfp_single_fcmp,   OP_SCALAR },
 713         [FEXT_TO_IDX(FEXT_FCMPE)]       = { vfp_single_fcmpe,  OP_SCALAR },
 714         [FEXT_TO_IDX(FEXT_FCMPZ)]       = { vfp_single_fcmpz,  OP_SCALAR },
 715         [FEXT_TO_IDX(FEXT_FCMPEZ)]      = { vfp_single_fcmpez, OP_SCALAR },
 716         [FEXT_TO_IDX(FEXT_FCVT)]        = { vfp_single_fcvtd,  OP_SCALAR|OP_DD },
 717         [FEXT_TO_IDX(FEXT_FUITO)]       = { vfp_single_fuito,  OP_SCALAR },
 718         [FEXT_TO_IDX(FEXT_FSITO)]       = { vfp_single_fsito,  OP_SCALAR },
 719         [FEXT_TO_IDX(FEXT_FTOUI)]       = { vfp_single_ftoui,  OP_SCALAR },
 720         [FEXT_TO_IDX(FEXT_FTOUIZ)]      = { vfp_single_ftouiz, OP_SCALAR },
 721         [FEXT_TO_IDX(FEXT_FTOSI)]       = { vfp_single_ftosi,  OP_SCALAR },
 722         [FEXT_TO_IDX(FEXT_FTOSIZ)]      = { vfp_single_ftosiz, OP_SCALAR },
 723 };
 724
 725
 726
 727
 728
 729 static u32
 730 vfp_single_fadd_nonnumber(struct vfp_single *vsd, struct vfp_single *vsn,
 731                           struct vfp_single *vsm, u32 fpscr)
 732 {
 733         struct vfp_single *vsp;
 734         u32 exceptions = 0;
 735         int tn, tm;
 736
 737         tn = vfp_single_type(vsn);
 738         tm = vfp_single_type(vsm);
 739
 740         if (tn & tm & VFP_INFINITY) {
 741                 /*
 742                  * Two infinities.  Are they different signs?
 743                  */
 744                 if (vsn->sign ^ vsm->sign) {
 745                         /*
 746                          * different signs -> invalid
 747                          */
 748                         exceptions = FPSCR_IOC;
 749                         vsp = &vfp_single_default_qnan;
 750                 } else {
 751                         /*
 752                          * same signs -> valid
 753                          */
 754                         vsp = vsn;
 755                 }
 756         } else if (tn & VFP_INFINITY && tm & VFP_NUMBER) {
 757                 /*
 758                  * One infinity and one number -> infinity
 759                  */
 760                 vsp = vsn;
 761         } else {
 762                 /*
 763                  * 'n' is a NaN of some type
 764                  */
 765                 return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 766         }
 767         *vsd = *vsp;
 768         return exceptions;
 769 }
 770
 771 static u32
 772 vfp_single_add(struct vfp_single *vsd, struct vfp_single *vsn,
 773                struct vfp_single *vsm, u32 fpscr)
 774 {
 775         u32 exp_diff, m_sig;
 776
 777         if (vsn->significand & 0x80000000 ||
 778             vsm->significand & 0x80000000) {
 779                 pr_info("VFP: bad FP values in %s\n", __func__);
 780                 vfp_single_dump("VSN", vsn);
 781                 vfp_single_dump("VSM", vsm);
 782         }
 783
 784         /*
 785          * Ensure that 'n' is the largest magnitude number.  Note that
 786          * if 'n' and 'm' have equal exponents, we do not swap them.
 787          * This ensures that NaN propagation works correctly.
 788          */
 789         if (vsn->exponent < vsm->exponent) {
 790                 struct vfp_single *t = vsn;
 791                 vsn = vsm;
 792                 vsm = t;
 793         }
 794
 795         /*
 796          * Is 'n' an infinity or a NaN?  Note that 'm' may be a number,
 797          * infinity or a NaN here.
 798          */
 799         if (vsn->exponent == 255)
 800                 return vfp_single_fadd_nonnumber(vsd, vsn, vsm, fpscr);
 801
 802         /*
 803          * We have two proper numbers, where 'vsn' is the larger magnitude.
 804          *
 805          * Copy 'n' to 'd' before doing the arithmetic.
 806          */
 807         *vsd = *vsn;
 808
 809         /*
 810          * Align both numbers.
 811          */
 812         exp_diff = vsn->exponent - vsm->exponent;
 813         m_sig = vfp_shiftright32jamming(vsm->significand, exp_diff);
 814
 815         /*
 816          * If the signs are different, we are really subtracting.
 817          */
 818         if (vsn->sign ^ vsm->sign) {
 819                 m_sig = vsn->significand - m_sig;
 820                 if ((s32)m_sig < 0) {
 821                         vsd->sign = vfp_sign_negate(vsd->sign);
 822                         m_sig = -m_sig;
 823                 } else if (m_sig == 0) {
 824                         vsd->sign = (fpscr & FPSCR_RMODE_MASK) ==
 825                                       FPSCR_ROUND_MINUSINF ? 0x8000 : 0;
 826                 }
 827         } else {
 828                 m_sig = vsn->significand + m_sig;
 829         }
 830         vsd->significand = m_sig;
 831
 832         return 0;
 833 }
 834
 835 static u32
 836 vfp_single_multiply(struct vfp_single *vsd, struct vfp_single *vsn, struct vfp_single *vsm, u32 fpscr)
 837 {
 838         vfp_single_dump("VSN", vsn);
 839         vfp_single_dump("VSM", vsm);
 840
 841         /*
 842          * Ensure that 'n' is the largest magnitude number.  Note that
 843          * if 'n' and 'm' have equal exponents, we do not swap them.
 844          * This ensures that NaN propagation works correctly.
 845          */
 846         if (vsn->exponent < vsm->exponent) {
 847                 struct vfp_single *t = vsn;
 848                 vsn = vsm;
 849                 vsm = t;
 850                 pr_debug("VFP: swapping M <-> N\n");
 851         }
 852
 853         vsd->sign = vsn->sign ^ vsm->sign;
 854
 855         /*
 856          * If 'n' is an infinity or NaN, handle it.  'm' may be anything.
 857          */
 858         if (vsn->exponent == 255) {
 859                 if (vsn->significand || (vsm->exponent == 255 && vsm->significand))
 860                         return vfp_propagate_nan(vsd, vsn, vsm, fpscr);
 861                 if ((vsm->exponent | vsm->significand) == 0) {
 862                         *vsd = vfp_single_default_qnan;
 863                         return FPSCR_IOC;
 864                 }
 865                 vsd->exponent = vsn->exponent;
 866                 vsd->significand = 0;
 867                 return 0;
 868         }
 869
 870         /*
 871          * If 'm' is zero, the result is always zero.  In this case,
 872          * 'n' may be zero or a number, but it doesn't matter which.
 873          */
 874         if ((vsm->exponent | vsm->significand) == 0) {
 875                 vsd->exponent = 0;
 876                 vsd->significand = 0;
 877                 return 0;
 878         }
 879
 880         /*
 881          * We add 2 to the destination exponent for the same reason as
 882          * the addition case - though this time we have +1 from each
 883          * input operand.
 884          */
 885         vsd->exponent = vsn->exponent + vsm->exponent - 127 + 2;
 886         vsd->significand = vfp_hi64to32jamming((u64)vsn->significand * vsm->significand);
 887
 888         vfp_single_dump("VSD", vsd);
 889         return 0;
 890 }
 891
 892 #define NEG_MULTIPLY    (1 << 0)
 893 #define NEG_SUBTRACT    (1 << 1)
 894
 895 static u32
 896 vfp_single_multiply_accumulate(int sd, int sn, s32 m, u32 fpscr, u32 negate, char *func)
 897 {
 898         struct vfp_single vsd, vsp, vsn, vsm;
 899         u32 exceptions;
 900         s32 v;
 901
 902         v = vfp_get_float(sn);
 903         pr_debug("VFP: s%u = %08x\n", sn, v);
 904         vfp_single_unpack(&vsn, v);
 905         if (vsn.exponent == 0 && vsn.significand)
 906                 vfp_single_normalise_denormal(&vsn);
 907
 908         vfp_single_unpack(&vsm, m);
 909         if (vsm.exponent == 0 && vsm.significand)
 910                 vfp_single_normalise_denormal(&vsm);
 911
 912         exceptions = vfp_single_multiply(&vsp, &vsn, &vsm, fpscr);
 913         if (negate & NEG_MULTIPLY)
 914                 vsp.sign = vfp_sign_negate(vsp.sign);
 915
 916         v = vfp_get_float(sd);
 917         pr_debug("VFP: s%u = %08x\n", sd, v);
 918         vfp_single_unpack(&vsn, v);
 919         if (negate & NEG_SUBTRACT)
 920                 vsn.sign = vfp_sign_negate(vsn.sign);
 921
 922         exceptions |= vfp_single_add(&vsd, &vsn, &vsp, fpscr);
 923
 924         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, func);
 925 }
 926
 927 /*
 928  * Standard operations
 929  */
 930
 931 /*
 932  * sd = sd + (sn * sm)
 933  */
 934 static u32 vfp_single_fmac(int sd, int sn, s32 m, u32 fpscr)
 935 {
 936         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, 0, "fmac");
 937 }
 938
 939 /*
 940  * sd = sd - (sn * sm)
 941  */
 942 static u32 vfp_single_fnmac(int sd, int sn, s32 m, u32 fpscr)
 943 {
 944         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_MULTIPLY, "fnmac");
 945 }
 946
 947 /*
 948  * sd = -sd + (sn * sm)
 949  */
 950 static u32 vfp_single_fmsc(int sd, int sn, s32 m, u32 fpscr)
 951 {
 952         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT, "fmsc");
 953 }
 954
 955 /*
 956  * sd = -sd - (sn * sm)
 957  */
 958 static u32 vfp_single_fnmsc(int sd, int sn, s32 m, u32 fpscr)
 959 {
 960         return vfp_single_multiply_accumulate(sd, sn, m, fpscr, NEG_SUBTRACT | NEG_MULTIPLY, "fnmsc");
 961 }
 962
 963 /*
 964  * sd = sn * sm
 965  */
 966 static u32 vfp_single_fmul(int sd, int sn, s32 m, u32 fpscr)
 967 {
 968         struct vfp_single vsd, vsn, vsm;
 969         u32 exceptions;
 970         s32 n = vfp_get_float(sn);
 971
 972         pr_debug("VFP: s%u = %08x\n", sn, n);
 973
 974         vfp_single_unpack(&vsn, n);
 975         if (vsn.exponent == 0 && vsn.significand)
 976                 vfp_single_normalise_denormal(&vsn);
 977
 978         vfp_single_unpack(&vsm, m);
 979         if (vsm.exponent == 0 && vsm.significand)
 980                 vfp_single_normalise_denormal(&vsm);
 981
 982         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
 983         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fmul");
 984 }
 985
 986 /*
 987  * sd = -(sn * sm)
 988  */
 989 static u32 vfp_single_fnmul(int sd, int sn, s32 m, u32 fpscr)
 990 {
 991         struct vfp_single vsd, vsn, vsm;
 992         u32 exceptions;
 993         s32 n = vfp_get_float(sn);
 994
 995         pr_debug("VFP: s%u = %08x\n", sn, n);
 996
 997         vfp_single_unpack(&vsn, n);
 998         if (vsn.exponent == 0 && vsn.significand)
 999                 vfp_single_normalise_denormal(&vsn);
1000
1001         vfp_single_unpack(&vsm, m);
1002         if (vsm.exponent == 0 && vsm.significand)
1003                 vfp_single_normalise_denormal(&vsm);
1004
1005         exceptions = vfp_single_multiply(&vsd, &vsn, &vsm, fpscr);
1006         vsd.sign = vfp_sign_negate(vsd.sign);
1007         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fnmul");
1008 }
1009
1010 /*
1011  * sd = sn + sm
1012  */
1013 static u32 vfp_single_fadd(int sd, int sn, s32 m, u32 fpscr)
1014 {
1015         struct vfp_single vsd, vsn, vsm;
1016         u32 exceptions;
1017         s32 n = vfp_get_float(sn);
1018
1019         pr_debug("VFP: s%u = %08x\n", sn, n);
1020
1021         /*
1022          * Unpack and normalise denormals.
1023          */
1024         vfp_single_unpack(&vsn, n);
1025         if (vsn.exponent == 0 && vsn.significand)
1026                 vfp_single_normalise_denormal(&vsn);
1027
1028         vfp_single_unpack(&vsm, m);
1029         if (vsm.exponent == 0 && vsm.significand)
1030                 vfp_single_normalise_denormal(&vsm);
1031
1032         exceptions = vfp_single_add(&vsd, &vsn, &vsm, fpscr);
1033
1034         return vfp_single_normaliseround(sd, &vsd, fpscr, exceptions, "fadd");
1035 }
1036
1037 /*
1038  * sd = sn - sm
1039  */
1040 static u32 vfp_single_fsub(int sd, int sn, s32 m, u32 fpscr)
1041 {
1042         /*
1043          * Subtraction is addition with one sign inverted.
1044          */
1045         return vfp_single_fadd(sd, sn, vfp_single_packed_negate(m), fpscr);
1046 }
1047
1048 /*
1049  * sd = sn / sm
1050  */
1051 static u32 vfp_single_fdiv(int sd, int sn, s32 m, u32 fpscr)
1052 {
1053         struct vfp_single vsd, vsn, vsm;
1054         u32 exceptions = 0;
1055         s32 n = vfp_get_float(sn);
1056         int tm, tn;
1057
1058         pr_debug("VFP: s%u = %08x\n", sn, n);
1059
1060         vfp_single_unpack(&vsn, n);
1061         vfp_single_unpack(&vsm, m);
1062
1063         vsd.sign = vsn.sign ^ vsm.sign;
1064
1065         tn = vfp_single_type(&vsn);
1066         tm = vfp_single_type(&vsm);
1067
1068         /*
1069          * Is n a NAN?
1070          */
1071         if (tn & VFP_NAN)
1072                 goto vsn_nan;
1073
1074         /*
1075          * Is m a NAN?
1076          */
1077         if (tm & VFP_NAN)
1078                 goto vsm_nan;
1079
1080         /*
1081          * If n and m are infinity, the result is invalid
1082          * If n and m are zero, the result is invalid
1083          */
1084         if (tm & tn & (VFP_INFINITY|VFP_ZERO))
1085                 goto invalid;
1086
1087         /*
1088          * If n is infinity, the result is infinity
1089          */
1090         if (tn & VFP_INFINITY)
1091                 goto infinity;
1092
1093         /*
1094          * If m is zero, raise div0 exception
1095          */
1096         if (tm & VFP_ZERO)
1097                 goto divzero;
1098
1099         /*
1100          * If m is infinity, or n is zero, the result is zero
1101          */
1102         if (tm & VFP_INFINITY || tn & VFP_ZERO)
1103                 goto zero;
1104
1105         if (tn & VFP_DENORMAL)
1106                 vfp_single_normalise_denormal(&vsn);
1107         if (tm & VFP_DENORMAL)
1108                 vfp_single_normalise_denormal(&vsm);
1109
1110         /*
1111          * Ok, we have two numbers, we can perform division.
1112          */
1113         vsd.exponent = vsn.exponent - vsm.exponent + 127 - 1;
1114         vsm.significand <<= 1;
1115         if (vsm.significand <= (2 * vsn.significand)) {
1116                 vsn.significand >>= 1;
1117                 vsd.exponent++;
1118         }
1119         {
1120                 u64 significand = (u64)vsn.significand << 32;
1121                 do_div(significand, vsm.significand);
1122                 vsd.significand = significand;
1123         }
1124         if ((vsd.significand & 0x3f) == 0)
1125                 vsd.significand |= ((u64)vsm.significand * vsd.significand != (u64)vsn.significand << 32);
1126
1127         return vfp_single_normaliseround(sd, &vsd, fpscr, 0, "fdiv");
1128
1129  vsn_nan:
1130         exceptions = vfp_propagate_nan(&vsd, &vsn, &vsm, fpscr);
1131  pack:
1132         vfp_put_float(vfp_single_pack(&vsd), sd);
1133         return exceptions;
1134
1135  vsm_nan:
1136         exceptions = vfp_propagate_nan(&vsd, &vsm, &vsn, fpscr);
1137         goto pack;
1138
1139  zero:
1140         vsd.exponent = 0;
1141         vsd.significand = 0;
1142         goto pack;
1143
1144  divzero:
1145         exceptions = FPSCR_DZC;
1146  infinity:
1147         vsd.exponent = 255;
1148         vsd.significand = 0;
1149         goto pack;
1150
1151  invalid:
1152         vfp_put_float(vfp_single_pack(&vfp_single_default_qnan), sd);
1153         return FPSCR_IOC;
1154 }
1155
1156 static struct op fops[16] = {
1157         [FOP_TO_IDX(FOP_FMAC)]  = { vfp_single_fmac,  0 },
1158         [FOP_TO_IDX(FOP_FNMAC)] = { vfp_single_fnmac, 0 },
1159         [FOP_TO_IDX(FOP_FMSC)]  = { vfp_single_fmsc,  0 },
1160         [FOP_TO_IDX(FOP_FNMSC)] = { vfp_single_fnmsc, 0 },
1161         [FOP_TO_IDX(FOP_FMUL)]  = { vfp_single_fmul,  0 },
1162         [FOP_TO_IDX(FOP_FNMUL)] = { vfp_single_fnmul, 0 },
1163         [FOP_TO_IDX(FOP_FADD)]  = { vfp_single_fadd,  0 },
1164         [FOP_TO_IDX(FOP_FSUB)]  = { vfp_single_fsub,  0 },
1165         [FOP_TO_IDX(FOP_FDIV)]  = { vfp_single_fdiv,  0 },
1166 };
1167
1168 #define FREG_BANK(x)    ((x) & 0x18)
1169 #define FREG_IDX(x)     ((x) & 7)
1170
1171 u32 vfp_single_cpdo(u32 inst, u32 fpscr)
1172 {
1173         u32 op = inst & FOP_MASK;
1174         u32 exceptions = 0;
1175         unsigned int dest;
1176         unsigned int sn = vfp_get_sn(inst);
1177         unsigned int sm = vfp_get_sm(inst);
1178         unsigned int vecitr, veclen, vecstride;
1179         struct op *fop;
1180
1181         vecstride = 1 + ((fpscr & FPSCR_STRIDE_MASK) == FPSCR_STRIDE_MASK);
1182
1183         fop = (op == FOP_EXT) ? &fops_ext[FEXT_TO_IDX(inst)] : &fops[FOP_TO_IDX(op)];
1184
1185         /*
1186          * fcvtsd takes a dN register number as destination, not sN.
1187          * Technically, if bit 0 of dd is set, this is an invalid
1188          * instruction.  However, we ignore this for efficiency.
1189          * It also only operates on scalars.
1190          */
1191         if (fop->flags & OP_DD)
1192                 dest = vfp_get_dd(inst);
1193         else
1194                 dest = vfp_get_sd(inst);
1195
1196         /*
1197          * If destination bank is zero, vector length is always '1'.
1198          * ARM DDI0100F C5.1.3, C5.3.2.
1199          */
1200         if ((fop->flags & OP_SCALAR) || FREG_BANK(dest) == 0)
1201                 veclen = 0;
1202         else
1203                 veclen = fpscr & FPSCR_LENGTH_MASK;
1204
1205         pr_debug("VFP: vecstride=%u veclen=%u\n", vecstride,
1206                  (veclen >> FPSCR_LENGTH_BIT) + 1);
1207
1208         if (!fop->fn)
1209                 goto invalid;
1210
1211         for (vecitr = 0; vecitr <= veclen; vecitr += 1 << FPSCR_LENGTH_BIT) {
1212                 s32 m = vfp_get_float(sm);
1213                 u32 except;
1214                 char type;
1215
1216                 type = fop->flags & OP_DD ? 'd' : 's';
1217                 if (op == FOP_EXT)
1218                         pr_debug("VFP: itr%d (%c%u) = op[%u] (s%u=%08x)\n",
1219                                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1220                                  sm, m);
1221                 else
1222                         pr_debug("VFP: itr%d (%c%u) = (s%u) op[%u] (s%u=%08x)\n",
1223                                  vecitr >> FPSCR_LENGTH_BIT, type, dest, sn,
1224                                  FOP_TO_IDX(op), sm, m);
1225
1226                 except = fop->fn(dest, sn, m, fpscr);
1227                 pr_debug("VFP: itr%d: exceptions=%08x\n",
1228                          vecitr >> FPSCR_LENGTH_BIT, except);
1229
1230                 exceptions |= except;
1231
1232                 /*
1233                  * CHECK: It appears to be undefined whether we stop when
1234                  * we encounter an exception.  We continue.
1235                  */
1236                 dest = FREG_BANK(dest) + ((FREG_IDX(dest) + vecstride) & 7);
1237                 sn = FREG_BANK(sn) + ((FREG_IDX(sn) + vecstride) & 7);
1238                 if (FREG_BANK(sm) != 0)
1239                         sm = FREG_BANK(sm) + ((FREG_IDX(sm) + vecstride) & 7);
1240         }
1241         return exceptions;
1242
1243  invalid:
1244         return (u32)-1;
1245 }