summaryrefslogtreecommitdiff
path: root/include/assimp/fast_atof.h
blob: f2d179d607a85da7c373882d6b8d258392a57164 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
#pragma once

// Copyright (C) 2002-2007 Nikolaus Gebhardt
// This file is part of the "Irrlicht Engine" and the "irrXML" project.
// For conditions of distribution and use, see copyright notice in irrlicht.h and irrXML.h

// ------------------------------------------------------------------------------------
// Original description: (Schrompf)
// Adapted to the ASSIMP library because the builtin atof indeed takes AGES to parse a
// float inside a large string. Before parsing, it does a strlen on the given point.
// Changes:
//  22nd October 08 (Aramis_acg): Added temporary cast to double, added strtoul10_64
//     to ensure long numbers are handled correctly
// ------------------------------------------------------------------------------------

#pragma once
#ifndef FAST_A_TO_F_H_INCLUDED
#define FAST_A_TO_F_H_INCLUDED

#ifdef __GNUC__
#   pragma GCC system_header
#endif

#include <cmath>
#include <limits>
#include <stdint.h>
#include <assimp/defs.h>

#include "StringComparison.h"
#include <assimp/DefaultLogger.hpp>
#include <assimp/Exceptional.h>
#include <assimp/StringUtils.h>

#ifdef _MSC_VER
#  include <stdint.h>
#else
#  include <assimp/Compiler/pstdint.h>
#endif

namespace Assimp {

static constexpr size_t NumItems = 16;

constexpr double fast_atof_table[NumItems] =  {  // we write [16] here instead of [] to work around a swig bug
    0.0,
    0.1,
    0.01,
    0.001,
    0.0001,
    0.00001,
    0.000001,
    0.0000001,
    0.00000001,
    0.000000001,
    0.0000000001,
    0.00000000001,
    0.000000000001,
    0.0000000000001,
    0.00000000000001,
    0.000000000000001
};

// ------------------------------------------------------------------------------------
// Convert a string in decimal format to a number
// ------------------------------------------------------------------------------------
inline unsigned int strtoul10( const char* in, const char** out=0) {
    unsigned int value = 0;

    for ( ;; ) {
        if ( *in < '0' || *in > '9' ) {
            break;
        }

        value = ( value * 10 ) + ( *in - '0' );
        ++in;
    }
    if ( out ) {
        *out = in;
    }
    return value;
}

// ------------------------------------------------------------------------------------
// Convert a string in octal format to a number
// ------------------------------------------------------------------------------------
inline unsigned int strtoul8( const char* in, const char** out=0) {
    unsigned int value( 0 );
    for ( ;; ) {
        if ( *in < '0' || *in > '7' ) {
            break;
        }

        value = ( value << 3 ) + ( *in - '0' );
        ++in;
    }
    if ( out ) {
        *out = in;
    }
    return value;
}

// ------------------------------------------------------------------------------------
// Convert a string in hex format to a number
// ------------------------------------------------------------------------------------
inline unsigned int strtoul16( const char* in, const char** out=0) {
    unsigned int value( 0 );
    for ( ;; ) {
        if ( *in >= '0' && *in <= '9' ) {
            value = ( value << 4u ) + ( *in - '0' );
        } else if (*in >= 'A' && *in <= 'F') {
            value = ( value << 4u ) + ( *in - 'A' ) + 10;
        } else if (*in >= 'a' && *in <= 'f') {
            value = ( value << 4u ) + ( *in - 'a' ) + 10;
        } else {
            break;
        }
        ++in;
    }
    if ( out ) {
        *out = in;
    }
    return value;
}

// ------------------------------------------------------------------------------------
// Convert just one hex digit
// Return value is UINT_MAX if the input character is not a hex digit.
// ------------------------------------------------------------------------------------
inline unsigned int HexDigitToDecimal(char in) {
    unsigned int out( UINT_MAX );
    if ( in >= '0' && in <= '9' ) {
        out = in - '0';
    } else if ( in >= 'a' && in <= 'f' ) {
        out = 10u + in - 'a';
    } else if ( in >= 'A' && in <= 'F' ) {
        out = 10u + in - 'A';
    }

    // return value is UINT_MAX if the input is not a hex digit
    return out;
}

// ------------------------------------------------------------------------------------
// Convert a hex-encoded octet (2 characters, i.e. df or 1a).
// ------------------------------------------------------------------------------------
inline uint8_t HexOctetToDecimal(const char* in) {
    return ((uint8_t)HexDigitToDecimal(in[0])<<4)+(uint8_t)HexDigitToDecimal(in[1]);
}

// ------------------------------------------------------------------------------------
// signed variant of strtoul10
// ------------------------------------------------------------------------------------
inline int strtol10( const char* in, const char** out=0) {
    bool inv = (*in=='-');
    if ( inv || *in == '+' ) {
        ++in;
    }

    int value = strtoul10(in,out);
    if (inv) {
        if (value < INT_MAX) {
            value = -value;
        } else {
            ASSIMP_LOG_WARN( "Converting the string \"", in, "\" into an inverted value resulted in overflow." );
        }
    }
    return value;
}

// ------------------------------------------------------------------------------------
// Parse a C++-like integer literal - hex and oct prefixes.
// 0xNNNN - hex
// 0NNN   - oct
// NNN    - dec
// ------------------------------------------------------------------------------------
inline unsigned int strtoul_cppstyle( const char* in, const char** out=0) {
    if ('0' == in[0]) {
        return 'x' == in[1] ? strtoul16(in+2,out) : strtoul8(in+1,out);
    }
    return strtoul10(in, out);
}

// ------------------------------------------------------------------------------------
// Special version of the function, providing higher accuracy and safety
// It is mainly used by fast_atof to prevent ugly and unwanted integer overflows.
// ------------------------------------------------------------------------------------
template<typename ExceptionType = DeadlyImportError>
inline uint64_t strtoul10_64( const char* in, const char** out=0, unsigned int* max_inout=0) {
    unsigned int cur = 0;
    uint64_t value = 0;

    if ( *in < '0' || *in > '9' ) {
        // The string is known to be bad, so don't risk printing the whole thing.
        throw ExceptionType("The string \"", ai_str_toprintable(in, (int)strlen(in)), "\" cannot be converted into a value." );
    }

    for ( ;; ) {
        if ( *in < '0' || *in > '9' ) {
            break;
        }

        const uint64_t new_value = ( value * (uint64_t) 10 ) + ( (uint64_t) ( *in - '0' ) );

        // numeric overflow, we rely on you
        if ( new_value < value ) {
            ASSIMP_LOG_WARN( "Converting the string \"", in, "\" into a value resulted in overflow." );
            return 0;
        }

        value = new_value;

        ++in;
        ++cur;

        if (max_inout && *max_inout == cur) {
            if (out) { /* skip to end */
                while ( *in >= '0' && *in <= '9' ) {
                    ++in;
                }
                *out = in;
            }

            return value;
        }
    }
    if ( out ) {
        *out = in;
    }

    if ( max_inout ) {
        *max_inout = cur;
    }

    return value;
}

// ------------------------------------------------------------------------------------
// signed variant of strtoul10_64
// ------------------------------------------------------------------------------------
template<typename ExceptionType = DeadlyImportError>
inline int64_t strtol10_64(const char* in, const char** out = 0, unsigned int* max_inout = 0) {
    bool inv = (*in == '-');
    if ( inv || *in == '+' ) {
        ++in;
    }

    int64_t value = strtoul10_64<ExceptionType>(in, out, max_inout);
    if (inv) {
        value = -value;
    }
    return value;
}

// Number of relevant decimals for floating-point parsing.
#define AI_FAST_ATOF_RELAVANT_DECIMALS 15

// ------------------------------------------------------------------------------------
//! Provides a fast function for converting a string into a float,
//! about 6 times faster than atof in win32.
// If you find any bugs, please send them to me, niko (at) irrlicht3d.org.
// ------------------------------------------------------------------------------------
template<typename Real, typename ExceptionType = DeadlyImportError>
inline const char* fast_atoreal_move(const char* c, Real& out, bool check_comma = true) {
    Real f = 0;

    bool inv = (*c == '-');
    if (inv || *c == '+') {
        ++c;
    }

    if ((c[0] == 'N' || c[0] == 'n') && ASSIMP_strincmp(c, "nan", 3) == 0) {
        out = std::numeric_limits<Real>::quiet_NaN();
        c += 3;
        return c;
    }

    if ((c[0] == 'I' || c[0] == 'i') && ASSIMP_strincmp(c, "inf", 3) == 0) {
        out = std::numeric_limits<Real>::infinity();
        if (inv) {
            out = -out;
        }
        c += 3;
        if ((c[0] == 'I' || c[0] == 'i') && ASSIMP_strincmp(c, "inity", 5) == 0) {
            c += 5;
        }
        return c;
     }

    if (!(c[0] >= '0' && c[0] <= '9') &&
            !((c[0] == '.' || (check_comma && c[0] == ',')) && c[1] >= '0' && c[1] <= '9')) {
        // The string is known to be bad, so don't risk printing the whole thing.
        throw ExceptionType("Cannot parse string \"", ai_str_toprintable(c, (int)strlen(c)),
                                    "\" as a real number: does not start with digit "
                                    "or decimal point followed by digit.");
    }

    if (*c != '.' && (! check_comma || c[0] != ',')) {
        f = static_cast<Real>( strtoul10_64<ExceptionType> ( c, &c) );
    }

    if ((*c == '.' || (check_comma && c[0] == ',')) && c[1] >= '0' && c[1] <= '9') {
        ++c;

        // NOTE: The original implementation is highly inaccurate here. The precision of a single
        // IEEE 754 float is not high enough, everything behind the 6th digit tends to be more
        // inaccurate than it would need to be. Casting to double seems to solve the problem.
        // strtol_64 is used to prevent integer overflow.

        // Another fix: this tends to become 0 for long numbers if we don't limit the maximum
        // number of digits to be read. AI_FAST_ATOF_RELAVANT_DECIMALS can be a value between
        // 1 and 15.
        unsigned int diff = AI_FAST_ATOF_RELAVANT_DECIMALS;
        double pl = static_cast<double>( strtoul10_64<ExceptionType> ( c, &c, &diff ));

        pl *= fast_atof_table[diff];
        f += static_cast<Real>( pl );
    }
    // For backwards compatibility: eat trailing dots, but not trailing commas.
    else if (*c == '.') {
        ++c;
    }

    // A major 'E' must be allowed. Necessary for proper reading of some DXF files.
    // Thanks to Zhao Lei to point out that this if() must be outside the if (*c == '.' ..)
    if (*c == 'e' || *c == 'E') {
        ++c;
        const bool einv = (*c=='-');
        if (einv || *c=='+') {
            ++c;
        }

        // The reason float constants are used here is that we've seen cases where compilers
        // would perform such casts on compile-time constants at runtime, which would be
        // bad considering how frequently fast_atoreal_move<float> is called in Assimp.
        Real exp = static_cast<Real>( strtoul10_64<ExceptionType>(c, &c) );
        if (einv) {
            exp = -exp;
        }
        f *= std::pow(static_cast<Real>(10.0), exp);
    }

    if (inv) {
        f = -f;
    }
    out = f;
    return c;
}

// ------------------------------------------------------------------------------------
// The same but more human.
template<typename ExceptionType = DeadlyImportError>
inline ai_real fast_atof(const char* c) {
    ai_real ret(0.0);
    fast_atoreal_move<ai_real, ExceptionType>(c, ret);

    return ret;
}

template<typename ExceptionType = DeadlyImportError>
inline
ai_real fast_atof( const char* c, const char** cout) {
    ai_real ret(0.0);
    *cout = fast_atoreal_move<ai_real, ExceptionType>(c, ret);

    return ret;
}

template<typename ExceptionType = DeadlyImportError>
inline ai_real fast_atof( const char** inout) {
    ai_real ret(0.0);
    *inout = fast_atoreal_move<ai_real, ExceptionType>(*inout, ret);

    return ret;
}

} //! namespace Assimp

#endif // FAST_A_TO_F_H_INCLUDED