amath/lib/dconv/dragon4.cpp

1162 lines
41 KiB
C++
Executable File

/******************************************************************************
Copyright (c) 2014 Ryan Juckett
http://www.ryanjuckett.com/
This software is provided 'as-is', without any express or implied
warranty. In no event will the authors be held liable for any damages
arising from the use of this software.
Permission is granted to anyone to use this software for any purpose,
including commercial applications, and to alter it and redistribute it
freely, subject to the following restrictions:
1. The origin of this software must not be misrepresented; you must not
claim that you wrote the original software. If you use this software
in a product, an acknowledgment in the product documentation would be
appreciated but is not required.
2. Altered source versions must be plainly marked as such, and must not be
misrepresented as being the original software.
3. This notice may not be removed or altered from any source
distribution.
*******************************************************************************
Copyright (c) 2015-2017 Carsten Sonne Larsen <cs@innolan.dk>
All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
are met:
1. Redistributions of source code must retain the above copyright
notice, this list of conditions and the following disclaimer.
2. Redistributions in binary form must reproduce the above copyright
notice, this list of conditions and the following disclaimer in the
documentation and/or other materials provided with the distribution.
THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
The origin source code can be obtained from:
http://www.ryanjuckett.com/
******************************************************************************/
#include "math.h"
#include "dmath.h"
#include "dragon4.h"
//******************************************************************************
// Maximum number of 32 bit blocks needed in high precision arithmetic
// to print out 64 bit IEEE floating point values.
//******************************************************************************
const tU32 c_BigInt_MaxBlocks = 35;
//******************************************************************************
// This structure stores a high precision unsigned integer. It uses a buffer
// of 32 bit integer blocks along with a length. The lowest bits of the integer
// are stored at the start of the buffer and the length is set to the minimum
// value that contains the integer. Thus, there are never any zero blocks at the
// end of the buffer.
//******************************************************************************
struct tBigInt
{
// Copy integer
tBigInt & operator=(const tBigInt &rhs)
{
tU32 length = rhs.m_length;
tU32 * pLhsCur = m_blocks;
for (const tU32 *pRhsCur = rhs.m_blocks, *pRhsEnd = pRhsCur + length;
pRhsCur != pRhsEnd;
++pLhsCur, ++pRhsCur)
{
*pLhsCur = *pRhsCur;
}
m_length = length;
return *this;
}
// Data accessors
tU32 GetLength() const {
return m_length;
}
tU32 GetBlock(tU32 idx) const {
return m_blocks[idx];
}
// Zero helper functions
void SetZero() {
m_length = 0;
}
tB IsZero() const {
return m_length == 0;
}
// Basic type accessors
void SetU64(tU64 val)
{
if (val > 0xFFFFFFFF)
{
m_blocks[0] = val & 0xFFFFFFFF;
m_blocks[1] = (val >> 32) & 0xFFFFFFFF;
m_length = 2;
}
else if (val != 0)
{
m_blocks[0] = val & 0xFFFFFFFF;
m_length = 1;
}
else
{
m_length = 0;
}
}
void SetU32(tU32 val)
{
if (val != 0)
{
m_blocks[0] = val;
m_length = (val != 0);
}
else
{
m_length = 0;
}
}
tU32 GetU32() const {
return (m_length == 0) ? 0 : m_blocks[0];
}
// Member data
tU32 m_length;
tU32 m_blocks[c_BigInt_MaxBlocks];
};
//******************************************************************************
// Returns 0 if (lhs = rhs), negative if (lhs < rhs), positive if (lhs > rhs)
//******************************************************************************
static tS32 BigInt_Compare(const tBigInt & lhs, const tBigInt & rhs)
{
// A bigger length implies a bigger number.
tS32 lengthDiff = lhs.m_length - rhs.m_length;
if (lengthDiff != 0)
return lengthDiff;
// Compare blocks one by one from high to low.
for (tS32 i = lhs.m_length - 1; i >= 0; --i)
{
if (lhs.m_blocks[i] == rhs.m_blocks[i])
continue;
else if (lhs.m_blocks[i] > rhs.m_blocks[i])
return 1;
else
return -1;
}
// no blocks differed
return 0;
}
//******************************************************************************
// result = lhs + rhs
//******************************************************************************
static void BigInt_Add(tBigInt * pResult, const tBigInt & lhs, const tBigInt & rhs)
{
// determine which operand has the smaller length
const tBigInt * pLarge;
const tBigInt * pSmall;
if (lhs.m_length < rhs.m_length)
{
pSmall = &lhs;
pLarge = &rhs;
}
else
{
pSmall = &rhs;
pLarge = &lhs;
}
const tU32 largeLen = pLarge->m_length;
const tU32 smallLen = pSmall->m_length;
// The output will be at least as long as the largest input
pResult->m_length = largeLen;
// Add each block and add carry the overflow to the next block
tU64 carry = 0;
const tU32 * pLargeCur = pLarge->m_blocks;
const tU32 * pLargeEnd = pLargeCur + largeLen;
const tU32 * pSmallCur = pSmall->m_blocks;
const tU32 * pSmallEnd = pSmallCur + smallLen;
tU32 * pResultCur = pResult->m_blocks;
while (pSmallCur != pSmallEnd)
{
tU64 sum = carry + (tU64)(*pLargeCur) + (tU64)(*pSmallCur);
carry = sum >> 32;
(*pResultCur) = sum & 0xFFFFFFFF;
++pLargeCur;
++pSmallCur;
++pResultCur;
}
// Add the carry to any blocks that only exist in the large operand
while (pLargeCur != pLargeEnd)
{
tU64 sum = carry + (tU64)(*pLargeCur);
carry = sum >> 32;
(*pResultCur) = sum & 0xFFFFFFFF;
++pLargeCur;
++pResultCur;
}
// If there's still a carry, append a new block
if (carry != 0)
{
RJ_ASSERT(carry == 1);
RJ_ASSERT((tU32)(pResultCur - pResult->m_blocks) == largeLen && (largeLen < c_BigInt_MaxBlocks));
*pResultCur = 1;
pResult->m_length = largeLen + 1;
}
else
{
pResult->m_length = largeLen;
}
}
//******************************************************************************
// result = lhs * rhs
//******************************************************************************
static void BigInt_Multiply(tBigInt * pResult, const tBigInt &lhs, const tBigInt &rhs)
{
RJ_ASSERT( pResult != &lhs && pResult != &rhs );
// determine which operand has the smaller length
const tBigInt * pLarge;
const tBigInt * pSmall;
if (lhs.m_length < rhs.m_length)
{
pSmall = &lhs;
pLarge = &rhs;
}
else
{
pSmall = &rhs;
pLarge = &lhs;
}
// set the maximum possible result length
tU32 maxResultLen = pLarge->m_length + pSmall->m_length;
RJ_ASSERT( maxResultLen <= c_BigInt_MaxBlocks );
// clear the result data
for(tU32 * pCur = pResult->m_blocks, *pEnd = pCur + maxResultLen; pCur != pEnd; ++pCur)
*pCur = 0;
// perform standard long multiplication
const tU32 *pLargeBeg = pLarge->m_blocks;
const tU32 *pLargeEnd = pLargeBeg + pLarge->m_length;
// for each small block
tU32 *pResultStart = pResult->m_blocks;
for(const tU32 *pSmallCur = pSmall->m_blocks, *pSmallEnd = pSmallCur + pSmall->m_length;
pSmallCur != pSmallEnd;
++pSmallCur, ++pResultStart)
{
// if non-zero, multiply against all the large blocks and add into the result
const tU32 multiplier = *pSmallCur;
if (multiplier != 0)
{
const tU32 *pLargeCur = pLargeBeg;
tU32 *pResultCur = pResultStart;
tU64 carry = 0;
do
{
tU64 product = (*pResultCur) + (*pLargeCur)*(tU64)multiplier + carry;
carry = product >> 32;
*pResultCur = product & 0xFFFFFFFF;
++pLargeCur;
++pResultCur;
} while(pLargeCur != pLargeEnd);
RJ_ASSERT(pResultCur < pResult->m_blocks + maxResultLen);
*pResultCur = (tU32)(carry & 0xFFFFFFFF);
}
}
// check if the terminating block has no set bits
if (maxResultLen > 0 && pResult->m_blocks[maxResultLen - 1] == 0)
pResult->m_length = maxResultLen-1;
else
pResult->m_length = maxResultLen;
}
//******************************************************************************
// result = lhs * rhs
//******************************************************************************
static void BigInt_Multiply(tBigInt * pResult, const tBigInt & lhs, tU32 rhs)
{
// perform long multiplication
tU32 carry = 0;
tU32 *pResultCur = pResult->m_blocks;
const tU32 *pLhsCur = lhs.m_blocks;
const tU32 *pLhsEnd = lhs.m_blocks + lhs.m_length;
for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )
{
tU64 product = (tU64)(*pLhsCur) * rhs + carry;
*pResultCur = (tU32)(product & 0xFFFFFFFF);
carry = product >> 32;
}
// if there is a remaining carry, grow the array
if (carry != 0)
{
// grow the array
RJ_ASSERT(lhs.m_length + 1 <= c_BigInt_MaxBlocks);
*pResultCur = (tU32)carry;
pResult->m_length = lhs.m_length + 1;
}
else
{
pResult->m_length = lhs.m_length;
}
}
//******************************************************************************
// result = in * 2
//******************************************************************************
static void BigInt_Multiply2(tBigInt * pResult, const tBigInt &in)
{
// shift all the blocks by one
tU32 carry = 0;
tU32 *pResultCur = pResult->m_blocks;
const tU32 *pLhsCur = in.m_blocks;
const tU32 *pLhsEnd = in.m_blocks + in.m_length;
for ( ; pLhsCur != pLhsEnd; ++pLhsCur, ++pResultCur )
{
tU32 cur = *pLhsCur;
*pResultCur = (cur << 1) | carry;
carry = cur >> 31;
}
if (carry != 0)
{
// grow the array
RJ_ASSERT(in.m_length + 1 <= c_BigInt_MaxBlocks);
*pResultCur = carry;
pResult->m_length = in.m_length + 1;
}
else
{
pResult->m_length = in.m_length;
}
}
//******************************************************************************
// result = result * 2
//******************************************************************************
static void BigInt_Multiply2(tBigInt * pResult)
{
// shift all the blocks by one
tU32 carry = 0;
tU32 *pCur = pResult->m_blocks;
tU32 *pEnd = pResult->m_blocks + pResult->m_length;
for ( ; pCur != pEnd; ++pCur )
{
tU32 cur = *pCur;
*pCur = (cur << 1) | carry;
carry = cur >> 31;
}
if (carry != 0)
{
// grow the array
RJ_ASSERT(pResult->m_length + 1 <= c_BigInt_MaxBlocks);
*pCur = carry;
++pResult->m_length;
}
}
//******************************************************************************
// result = result * 10
//******************************************************************************
static void BigInt_Multiply10(tBigInt * pResult)
{
// multiply all the blocks
tU64 carry = 0;
tU32 *pCur = pResult->m_blocks;
tU32 *pEnd = pResult->m_blocks + pResult->m_length;
for ( ; pCur != pEnd; ++pCur )
{
tU64 product = (tU64)(*pCur) * 10ull + carry;
(*pCur) = (tU32)(product & 0xFFFFFFFF);
carry = product >> 32;
}
if (carry != 0)
{
// grow the array
RJ_ASSERT(pResult->m_length + 1 <= c_BigInt_MaxBlocks);
*pCur = (tU32)carry;
++pResult->m_length;
}
}
//******************************************************************************
//******************************************************************************
static tU32 g_PowerOf10_U32[] =
{
1, // 10 ^ 0
10, // 10 ^ 1
100, // 10 ^ 2
1000, // 10 ^ 3
10000, // 10 ^ 4
100000, // 10 ^ 5
1000000, // 10 ^ 6
10000000, // 10 ^ 7
};
//******************************************************************************
// Note: This has a lot of wasted space in the big integer structures of the
// early table entries. It wouldn't be terribly hard to make the multiply
// function work on integer pointers with an array length instead of
// the tBigInt struct which would allow us to store a minimal amount of
// data here.
//******************************************************************************
static tBigInt g_PowerOf10_Big[] =
{
// 10 ^ 8
{ 1, { 100000000 } },
// 10 ^ 16
{ 2, { 0x6fc10000, 0x002386f2 } },
// 10 ^ 32
{ 4, { 0x00000000, 0x85acef81, 0x2d6d415b, 0x000004ee, } },
// 10 ^ 64
{ 7, { 0x00000000, 0x00000000, 0xbf6a1f01, 0x6e38ed64, 0xdaa797ed, 0xe93ff9f4, 0x00184f03, } },
// 10 ^ 128
{ 14, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x2e953e01, 0x03df9909, 0x0f1538fd,
0x2374e42f, 0xd3cff5ec, 0xc404dc08, 0xbccdb0da, 0xa6337f19, 0xe91f2603, 0x0000024e,
}
},
// 10 ^ 256
{ 27, { 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000, 0x00000000,
0x00000000, 0x982e7c01, 0xbed3875b, 0xd8d99f72, 0x12152f87, 0x6bde50c6, 0xcf4a6e70,
0xd595d80f, 0x26b2716e, 0xadc666b0, 0x1d153624, 0x3c42d35a, 0x63ff540e, 0xcc5573c0,
0x65f9ef17, 0x55bc28f2, 0x80dcc7f7, 0xf46eeddc, 0x5fdcefce, 0x000553f7,
}
}
};
//******************************************************************************
// result = 10^exponent
//******************************************************************************
static void BigInt_Pow10(tBigInt * pResult, tU32 exponent)
{
// make sure the exponent is within the bounds of the lookup table data
RJ_ASSERT(exponent < 512);
// create two temporary values to reduce large integer copy operations
tBigInt temp1;
tBigInt temp2;
tBigInt *pCurTemp = &temp1;
tBigInt *pNextTemp = &temp2;
// initialize the result by looking up a 32-bit power of 10 corresponding to the first 3 bits
tU32 smallExponent = exponent & 0x7;
pCurTemp->SetU32(g_PowerOf10_U32[smallExponent]);
// remove the low bits that we used for the 32-bit lookup table
exponent >>= 3;
tU32 tableIdx = 0;
// while there are remaining bits in the exponent to be processed
while (exponent != 0)
{
// if the current bit is set, multiply it with the corresponding power of 10
if(exponent & 1)
{
// multiply into the next temporary
BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );
// swap to the next temporary
tBigInt * pSwap = pCurTemp;
pCurTemp = pNextTemp;
pNextTemp = pSwap;
}
// advance to the next bit
++tableIdx;
exponent >>= 1;
}
// output the result
*pResult = *pCurTemp;
}
//******************************************************************************
// result = in * 10^exponent
//******************************************************************************
static void BigInt_MultiplyPow10(tBigInt * pResult, const tBigInt & in, tU32 exponent)
{
// make sure the exponent is within the bounds of the lookup table data
RJ_ASSERT(exponent < 512);
// create two temporary values to reduce large integer copy operations
tBigInt temp1;
tBigInt temp2;
tBigInt *pCurTemp = &temp1;
tBigInt *pNextTemp = &temp2;
// initialize the result by looking up a 32-bit power of 10 corresponding to the first 3 bits
tU32 smallExponent = exponent & 0x7;
if (smallExponent != 0)
{
BigInt_Multiply( pCurTemp, in, g_PowerOf10_U32[smallExponent] );
}
else
{
*pCurTemp = in;
}
// remove the low bits that we used for the 32-bit lookup table
exponent >>= 3;
tU32 tableIdx = 0;
// while there are remaining bits in the exponent to be processed
while (exponent != 0)
{
// if the current bit is set, multiply it with the corresponding power of 10
if(exponent & 1)
{
// multiply into the next temporary
BigInt_Multiply( pNextTemp, *pCurTemp, g_PowerOf10_Big[tableIdx] );
// swap to the next temporary
tBigInt * pSwap = pCurTemp;
pCurTemp = pNextTemp;
pNextTemp = pSwap;
}
// advance to the next bit
++tableIdx;
exponent >>= 1;
}
// output the result
*pResult = *pCurTemp;
}
//******************************************************************************
// result = 2^exponent
//******************************************************************************
static inline void BigInt_Pow2(tBigInt * pResult, tU32 exponent)
{
tU32 blockIdx = exponent / 32;
RJ_ASSERT( blockIdx < c_BigInt_MaxBlocks );
for ( tU32 i = 0; i <= blockIdx; ++i)
pResult->m_blocks[i] = 0;
pResult->m_length = blockIdx + 1;
tU32 bitIdx = (exponent % 32);
pResult->m_blocks[blockIdx] |= (1 << bitIdx);
}
//******************************************************************************
// This function will divide two large numbers under the assumption that the
// result is within the range [0,10) and the input numbers have been shifted
// to satisfy:
// - The highest block of the divisor is greater than or equal to 8 such that
// there is enough precision to make an accurate first guess at the quotient.
// - The highest block of the divisor is less than the maximum value on an
// unsigned 32-bit integer such that we can safely increment without overflow.
// - The dividend does not contain more blocks than the divisor such that we
// can estimate the quotient by dividing the equivalently placed high blocks.
//
// quotient = floor(dividend / divisor)
// remainder = dividend - quotient*divisor
//
// pDividend is updated to be the remainder and the quotient is returned.
//******************************************************************************
static tU32 BigInt_DivideWithRemainder_MaxQuotient9(tBigInt * pDividend, const tBigInt & divisor)
{
// Check that the divisor has been correctly shifted into range and that it is not
// smaller than the dividend in length.
RJ_ASSERT( !divisor.IsZero() &&
divisor.m_blocks[divisor.m_length-1] >= 8 &&
divisor.m_blocks[divisor.m_length-1] < 0xFFFFFFFF &&
pDividend->m_length <= divisor.m_length );
// If the dividend is smaller than the divisor, the quotient is zero and the divisor is already
// the remainder.
tU32 length = divisor.m_length;
if (pDividend->m_length < divisor.m_length)
return 0;
const tU32 * pFinalDivisorBlock = divisor.m_blocks + length - 1;
tU32 * pFinalDividendBlock = pDividend->m_blocks + length - 1;
// Compute an estimated quotient based on the high block value. This will either match the actual quotient or
// undershoot by one.
tU32 quotient = *pFinalDividendBlock / (*pFinalDivisorBlock + 1);
RJ_ASSERT(quotient <= 9);
// Divide out the estimated quotient
if (quotient != 0)
{
// dividend = dividend - divisor*quotient
const tU32 *pDivisorCur = divisor.m_blocks;
tU32 *pDividendCur = pDividend->m_blocks;
tU64 borrow = 0;
tU64 carry = 0;
do
{
tU64 product = (tU64)*pDivisorCur * (tU64)quotient + carry;
carry = product >> 32;
tU64 difference = (tU64)*pDividendCur - (product & 0xFFFFFFFF) - borrow;
borrow = (difference >> 32) & 1;
*pDividendCur = difference & 0xFFFFFFFF;
++pDivisorCur;
++pDividendCur;
} while(pDivisorCur <= pFinalDivisorBlock);
// remove all leading zero blocks from dividend
while (length > 0 && pDividend->m_blocks[length - 1] == 0)
--length;
pDividend->m_length = length;
}
// If the dividend is still larger than the divisor, we overshot our estimate quotient. To correct,
// we increment the quotient and subtract one more divisor from the dividend.
if ( BigInt_Compare(*pDividend, divisor) >= 0 )
{
++quotient;
// dividend = dividend - divisor
const tU32 *pDivisorCur = divisor.m_blocks;
tU32 *pDividendCur = pDividend->m_blocks;
tU64 borrow = 0;
do
{
tU64 difference = (tU64)*pDividendCur - (tU64)*pDivisorCur - borrow;
borrow = (difference >> 32) & 1;
*pDividendCur = difference & 0xFFFFFFFF;
++pDivisorCur;
++pDividendCur;
} while(pDivisorCur <= pFinalDivisorBlock);
// remove all leading zero blocks from dividend
while (length > 0 && pDividend->m_blocks[length - 1] == 0)
--length;
pDividend->m_length = length;
}
return quotient;
}
//******************************************************************************
// result = result << shift
//******************************************************************************
static void BigInt_ShiftLeft(tBigInt * pResult, tU32 shift)
{
RJ_ASSERT( shift != 0 );
tU32 shiftBlocks = shift / 32;
tU32 shiftBits = shift % 32;
// process blocks high to low so that we can safely process in place
const tU32 * pInBlocks = pResult->m_blocks;
tS32 inLength = pResult->m_length;
RJ_ASSERT( inLength + shiftBlocks < c_BigInt_MaxBlocks );
// check if the shift is block aligned
if (shiftBits == 0)
{
// copy blcoks from high to low
for (tU32 * pInCur = pResult->m_blocks + inLength, *pOutCur = pInCur + shiftBlocks;
pInCur >= pInBlocks;
--pInCur, --pOutCur)
{
*pOutCur = *pInCur;
}
// zero the remaining low blocks
for ( tU32 i = 0; i < shiftBlocks; ++i)
pResult->m_blocks[i] = 0;
pResult->m_length += shiftBlocks;
}
// else we need to shift partial blocks
else
{
tS32 inBlockIdx = inLength - 1;
tU32 outBlockIdx = inLength + shiftBlocks;
// set the length to hold the shifted blocks
RJ_ASSERT( outBlockIdx < c_BigInt_MaxBlocks );
pResult->m_length = outBlockIdx + 1;
// output the initial blocks
const tU32 lowBitsShift = (32 - shiftBits);
tU32 highBits = 0;
tU32 block = pResult->m_blocks[inBlockIdx];
tU32 lowBits = block >> lowBitsShift;
while ( inBlockIdx > 0 )
{
pResult->m_blocks[outBlockIdx] = highBits | lowBits;
highBits = block << shiftBits;
--inBlockIdx;
--outBlockIdx;
block = pResult->m_blocks[inBlockIdx];
lowBits = block >> lowBitsShift;
}
// output the final blocks
RJ_ASSERT( outBlockIdx == shiftBlocks + 1 );
pResult->m_blocks[outBlockIdx] = highBits | lowBits;
pResult->m_blocks[outBlockIdx-1] = block << shiftBits;
// zero the remaining low blocks
for ( tU32 i = 0; i < shiftBlocks; ++i)
pResult->m_blocks[i] = 0;
// check if the terminating block has no set bits
if (pResult->m_blocks[pResult->m_length - 1] == 0)
--pResult->m_length;
}
}
//******************************************************************************
// This is an implementation the Dragon4 algorithm to convert a binary number
// in floating point format to a decimal number in string format. The function
// returns the number of digits written to the output buffer and the output is
// not NUL terminated.
//
// The floating point input value is (mantissa * 2^exponent).
//
// See the following papers for more information on the algorithm:
// "How to Print Floating-Point Numbers Accurately"
// Steele and White
// http://kurtstephens.com/files/p372-steele.pdf
// "Printing Floating-Point Numbers Quickly and Accurately"
// Burger and Dybvig
// http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf
//******************************************************************************
tU32 Dragon4
(
const tU64 mantissa, // value significand
const tS32 exponent, // value exponent in base 2
const tU32 mantissaHighBitIdx, // index of the highest set mantissa bit
const tB hasUnequalMargins, // is the high margin twice as large as the low margin
const tCutoffMode cutoffMode, // how to determine output length
tU32 cutoffNumber, // parameter to the selected cutoffMode
tC8 * pOutBuffer, // buffer to output into
tU32 bufferSize, // maximum characters that can be printed to pOutBuffer
tS32 * pOutExponent // the base 10 exponent of the first digit
)
{
tC8 * pCurDigit = pOutBuffer;
RJ_ASSERT( bufferSize > 0 );
// if the mantissa is zero, the value is zero regardless of the exponent
if (mantissa == 0)
{
*pCurDigit = '0';
*pOutExponent = 0;
return 1;
}
// compute the initial state in integral form such that
// value = scaledValue / scale
// marginLow = scaledMarginLow / scale
tBigInt scale; // positive scale applied to value and margin such that they can be
// represented as whole numbers
tBigInt scaledValue; // scale * mantissa
tBigInt scaledMarginLow; // scale * 0.5 * (distance between this floating-point number and its
// immediate lower value)
// For normalized IEEE floating point values, each time the exponent is incremented the margin also
// doubles. That creates a subset of transition numbers where the high margin is twice the size of
// the low margin.
tBigInt * pScaledMarginHigh;
tBigInt optionalMarginHigh;
if ( hasUnequalMargins )
{
// if we have no fractional component
if (exponent > 0)
{
// 1) Expand the input value by multiplying out the mantissa and exponent. This represents
// the input value in its whole number representation.
// 2) Apply an additional scale of 2 such that later comparisons against the margin values
// are simplified.
// 3) Set the margin value to the lowest mantissa bit's scale.
// scaledValue = 2 * 2 * mantissa*2^exponent
scaledValue.SetU64( 4 * mantissa );
BigInt_ShiftLeft( &scaledValue, exponent );
// scale = 2 * 2 * 1
scale.SetU32( 4 );
// scaledMarginLow = 2 * 2^(exponent-1)
BigInt_Pow2( &scaledMarginLow, exponent );
// scaledMarginHigh = 2 * 2 * 2^(exponent-1)
BigInt_Pow2( &optionalMarginHigh, exponent + 1 );
}
// else we have a fractional exponent
else
{
// In order to track the mantissa data as an integer, we store it as is with a large scale
// scaledValue = 2 * 2 * mantissa
scaledValue.SetU64( 4 * mantissa );
// scale = 2 * 2 * 2^(-exponent)
BigInt_Pow2(&scale, -exponent + 2 );
// scaledMarginLow = 2 * 2^(-1)
scaledMarginLow.SetU32( 1 );
// scaledMarginHigh = 2 * 2 * 2^(-1)
optionalMarginHigh.SetU32( 2 );
}
// the high and low margins are different
pScaledMarginHigh = &optionalMarginHigh;
}
else
{
// if we have no fractional component
if (exponent > 0)
{
// 1) Expand the input value by multiplying out the mantissa and exponent. This represents
// the input value in its whole number representation.
// 2) Apply an additional scale of 2 such that later comparisons against the margin values
// are simplified.
// 3) Set the margin value to the lowest mantissa bit's scale.
// scaledValue = 2 * mantissa*2^exponent
scaledValue.SetU64( 2 * mantissa );
BigInt_ShiftLeft( &scaledValue, exponent );
// scale = 2 * 1
scale.SetU32( 2 );
// scaledMarginLow = 2 * 2^(exponent-1)
BigInt_Pow2( &scaledMarginLow, exponent );
}
// else we have a fractional exponent
else
{
// In order to track the mantissa data as an integer, we store it as is with a large scale
// scaledValue = 2 * mantissa
scaledValue.SetU64( 2 * mantissa );
// scale = 2 * 2^(-exponent)
BigInt_Pow2(&scale, -exponent + 1 );
// scaledMarginLow = 2 * 2^(-1)
scaledMarginLow.SetU32( 1 );
}
// the high and low margins are equal
pScaledMarginHigh = &scaledMarginLow;
}
// Compute an estimate for digitExponent that will be correct or undershoot by one.
// This optimization is based on the paper "Printing Floating-Point Numbers Quickly and Accurately"
// by Burger and Dybvig http://citeseerx.ist.psu.edu/viewdoc/download?doi=10.1.1.72.4656&rep=rep1&type=pdf
// We perform an additional subtraction of 0.69 to increase the frequency of a failed estimate
// because that lets us take a faster branch in the code. 0.69 is chosen because 0.69 + log10(2) is
// less than one by a reasonable epsilon that will account for any floating point error.
//
// We want to set digitExponent to floor(log10(v)) + 1
// v = mantissa*2^exponent
// log2(v) = log2(mantissa) + exponent;
// log10(v) = log2(v) * log10(2)
// floor(log2(v)) = mantissaHighBitIdx + exponent;
// log10(v) - log10(2) < (mantissaHighBitIdx + exponent) * log10(2) <= log10(v)
// log10(v) < (mantissaHighBitIdx + exponent) * log10(2) + log10(2) <= log10(v) + log10(2)
// floor( log10(v) ) < ceil( (mantissaHighBitIdx + exponent) * log10(2) ) <= floor( log10(v) ) + 1
const tF64 log10_2 = 0.30102999566398119521373889472449;
tS32 digitExponent = (tS32)(ceil(tF64((tS32)mantissaHighBitIdx + exponent) * log10_2 - 0.69));
// if the digit exponent is smaller than the smallest desired digit for fractional cutoff,
// pull the digit back into legal range at which point we will round to the appropriate value.
// Note that while our value for digitExponent is still an estimate, this is safe because it
// only increases the number. This will either correct digitExponent to an accurate value or it
// will clamp it above the accurate value.
if (cutoffMode == CutoffMode_FractionLength && digitExponent <= -(tS32)cutoffNumber)
{
digitExponent = -(tS32)cutoffNumber + 1;
}
// Divide value by 10^digitExponent.
if (digitExponent > 0)
{
// The exponent is positive creating a division so we multiply up the scale.
tBigInt temp;
BigInt_MultiplyPow10( &temp, scale, digitExponent );
scale = temp;
}
else if (digitExponent < 0)
{
// The exponent is negative creating a multiplication so we multiply up the scaledValue,
// scaledMarginLow and scaledMarginHigh.
tBigInt pow10;
BigInt_Pow10( &pow10, -digitExponent);
tBigInt temp;
BigInt_Multiply( &temp, scaledValue, pow10);
scaledValue = temp;
BigInt_Multiply( &temp, scaledMarginLow, pow10);
scaledMarginLow = temp;
if (pScaledMarginHigh != &scaledMarginLow)
BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
}
// If (value + marginHigh) >= 1, our estimate for digitExponent was too low
tBigInt scaledValueHigh;
BigInt_Add( &scaledValueHigh, scaledValue, *pScaledMarginHigh );
if( BigInt_Compare(scaledValueHigh,scale) >= 0 )
{
// The exponent estimate was incorrect.
// Increment the exponent and don't perform the premultiply needed
// for the first loop iteration.
digitExponent = digitExponent + 1;
}
else
{
// The exponent estimate was correct.
// Multiply larger by the output base to prepare for the first loop iteration.
BigInt_Multiply10( &scaledValue );
BigInt_Multiply10( &scaledMarginLow );
if (pScaledMarginHigh != &scaledMarginLow)
BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
}
// Compute the cutoff exponent (the exponent of the final digit to print).
// Default to the maximum size of the output buffer.
tS32 cutoffExponent = digitExponent - bufferSize;
switch(cutoffMode)
{
// print digits until we pass the accuracy margin limits or buffer size
case CutoffMode_Unique:
break;
// print cutoffNumber of digits or until we reach the buffer size
case CutoffMode_TotalLength:
{
tS32 desiredCutoffExponent = digitExponent - (tS32)cutoffNumber;
if (desiredCutoffExponent > cutoffExponent)
cutoffExponent = desiredCutoffExponent;
}
break;
// print cutoffNumber digits past the decimal point or until we reach the buffer size
case CutoffMode_FractionLength:
{
tS32 desiredCutoffExponent = -(tS32)cutoffNumber;
if (desiredCutoffExponent > cutoffExponent)
cutoffExponent = desiredCutoffExponent;
}
break;
}
// Output the exponent of the first digit we will print
*pOutExponent = digitExponent-1;
// In preparation for calling BigInt_DivideWithRemainder_MaxQuotient9(),
// we need to scale up our values such that the highest block of the denominator
// is greater than or equal to 8. We also need to guarantee that the numerator
// can never have a length greater than the denominator after each loop iteration.
// This requires the highest block of the denominator to be less than or equal to
// 429496729 which is the highest number that can be multiplied by 10 without
// overflowing to a new block.
RJ_ASSERT( scale.GetLength() > 0 );
tU32 hiBlock = scale.GetBlock( scale.GetLength() - 1 );
if (hiBlock < 8 || hiBlock > 429496729)
{
// Perform a bit shift on all values to get the highest block of the denominator into
// the range [8,429496729]. We are more likely to make accurate quotient estimations
// in BigInt_DivideWithRemainder_MaxQuotient9() with higher denominator values so
// we shift the denominator to place the highest bit at index 27 of the highest block.
// This is safe because (2^28 - 1) = 268435455 which is less than 429496729. This means
// that all values with a highest bit at index 27 are within range.
tU32 hiBlockLog2 = LogBase2(hiBlock);
RJ_ASSERT(hiBlockLog2 < 3 || hiBlockLog2 > 27);
tU32 shift = (32 + 27 - hiBlockLog2) % 32;
BigInt_ShiftLeft( &scale, shift );
BigInt_ShiftLeft( &scaledValue, shift);
BigInt_ShiftLeft( &scaledMarginLow, shift);
if (pScaledMarginHigh != &scaledMarginLow)
BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
}
// These values are used to inspect why the print loop terminated so we can properly
// round the final digit.
tB low; // did the value get within marginLow distance from zero
tB high; // did the value get within marginHigh distance from one
tU32 outputDigit; // current digit being output
if (cutoffMode == CutoffMode_Unique)
{
// For the unique cutoff mode, we will try to print until we have reached a level of
// precision that uniquely distinguishes this value from its neighbors. If we run
// out of space in the output buffer, we terminate early.
for (;;)
{
digitExponent = digitExponent-1;
// divide out the scale to extract the digit
outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);
RJ_ASSERT( outputDigit < 10 );
// update the high end of the value
BigInt_Add( &scaledValueHigh, scaledValue, *pScaledMarginHigh );
// stop looping if we are far enough away from our neighboring values
// or if we have reached the cutoff digit
low = BigInt_Compare(scaledValue, scaledMarginLow) < 0;
high = BigInt_Compare(scaledValueHigh, scale) > 0;
if (low | high | (digitExponent == cutoffExponent))
break;
// store the output digit
*pCurDigit = (tC8)('0' + outputDigit);
++pCurDigit;
// multiply larger by the output base
BigInt_Multiply10( &scaledValue );
BigInt_Multiply10( &scaledMarginLow );
if (pScaledMarginHigh != &scaledMarginLow)
BigInt_Multiply2( pScaledMarginHigh, scaledMarginLow );
}
}
else
{
// For length based cutoff modes, we will try to print until we
// have exhausted all precision (i.e. all remaining digits are zeros) or
// until we reach the desired cutoff digit.
low = false;
high = false;
for (;;)
{
digitExponent = digitExponent-1;
// divide out the scale to extract the digit
outputDigit = BigInt_DivideWithRemainder_MaxQuotient9(&scaledValue, scale);
RJ_ASSERT( outputDigit < 10 );
if ( scaledValue.IsZero() | (digitExponent == cutoffExponent) )
break;
// store the output digit
*pCurDigit = (tC8)('0' + outputDigit);
++pCurDigit;
// multiply larger by the output base
BigInt_Multiply10(&scaledValue);
}
}
// round off the final digit
// default to rounding down if value got too close to 0
tB roundDown = low;
// if it is legal to round up and down
if (low == high)
{
// round to the closest digit by comparing value with 0.5. To do this we need to convert
// the inequality to large integer values.
// compare( value, 0.5 )
// compare( scale * value, scale * 0.5 )
// compare( 2 * scale * value, scale )
BigInt_Multiply2(&scaledValue);
tS32 compare = BigInt_Compare(scaledValue, scale);
roundDown = compare < 0;
// if we are directly in the middle, round towards the even digit (i.e. IEEE rouding rules)
if (compare == 0)
roundDown = (outputDigit & 1) == 0;
}
// print the rounded digit
if (roundDown)
{
*pCurDigit = (tC8)('0' + outputDigit);
++pCurDigit;
}
else
{
// handle rounding up
if (outputDigit == 9)
{
// find the first non-nine prior digit
for (;;)
{
// if we are at the first digit
if (pCurDigit == pOutBuffer)
{
// output 1 at the next highest exponent
*pCurDigit = '1';
++pCurDigit;
*pOutExponent += 1;
break;
}
--pCurDigit;
if (*pCurDigit != '9')
{
// increment the digit
*pCurDigit += 1;
++pCurDigit;
break;
}
}
}
else
{
// values in the range [0,8] can perform a simple round up
*pCurDigit = (tC8)('0' + outputDigit + 1);
++pCurDigit;
}
}
// return the number of digits output
RJ_ASSERT(pCurDigit - pOutBuffer <= (tPtrDiff)bufferSize);
return pCurDigit - pOutBuffer;
}