3 /** This class provides mechanisms for
4 * efficiently formatting numbers and Strings.
5 * Data is appended to existing byte arrays. Note
6 * that the formatting of real or double values
7 * may differ slightly (in the last bit) from
8 * the standard Java packages since this routines
9 * are optimized for speed rather than accuracy.
11 * The methods in this class create no objects.
13 * If a number cannot fit into the requested space
14 * the truncateOnOverlow flag controls whether the
15 * formatter will attempt to append it using the
16 * available length in the output (a la C or Perl style
17 * formats). If this flag is set, or if the number
18 * cannot fit into space left in the buffer it is 'truncated'
19 * and the requested space is filled with a truncation fill
20 * character. A TruncationException may be thrown if the truncationThrow
23 * This class does not explicitly support separate methods
24 * for formatting reals in exponential notation. Real numbers
25 * near one are by default formatted in decimal notation while
26 * numbers with large (or very negative) exponents are formatted
27 * in exponential notation. By setting the limits at which these
28 * transitions take place the user can force either exponential or
32 public final class ByteFormatter {
34 /** Internal buffers used in formatting fields */
35 private byte[] tbuf1 = new byte[32];
36 private byte[] tbuf2 = new byte[32];
37 private static final double ilog10 = 1. / Math.log(10);
38 /** Should we truncate overflows or just run over limit */
39 private boolean truncateOnOverflow = true;
40 /** What do we use to fill when we cannot print the number? */
41 private byte truncationFill = (byte) '*'; // Default is often used in Fortran
42 /** Throw exception on truncations */
43 private boolean truncationThrow = true;
44 /** Should we right align? */
45 private boolean align = false;
46 /** Minimum magnitude to print in non-scientific notation. */
47 double simpleMin = 1.e-3;
48 /** Maximum magnitude to print in non-scientific notation. */
49 double simpleMax = 1.e6;
50 /** Powers of 10. We overextend on both sides.
51 * These should perhaps be tabulated rather than
52 * computed though it may be faster to calculate
53 * them than to read in the extra bytes in the class file.
55 private static final double tenpow[];
56 /** What index of tenpow is 10^0 */
57 private static final int zeropow;
59 static { // Static initializer
61 int min = (int) Math.floor((int) (Math.log(Double.MIN_VALUE) * ilog10));
62 int max = (int) Math.floor((int) (Math.log(Double.MAX_VALUE) * ilog10));
65 tenpow = new double[(max - min) + 1];
68 for (int i = 0; i < tenpow.length; i += 1) {
69 tenpow[i] = Math.pow(10, i + min);
73 /** Digits. We could handle other bases
74 * by extending or truncating this list and changing
75 * the division by 10 (and it's factors) at various
78 private static final byte[] digits = {
79 (byte) '0', (byte) '1', (byte) '2', (byte) '3', (byte) '4',
80 (byte) '5', (byte) '6', (byte) '7', (byte) '8', (byte) '9'};
82 /** Set the truncation behavior.
83 * @param val If set to true (the default) then do not
84 * exceed the requested length. If a number cannot
85 * be sensibly formatted, the truncation fill character
88 public void setTruncateOnOverflow(boolean val) {
89 truncateOnOverflow = val;
92 /** Should truncations cause a truncation overflow? */
93 public void setTruncationThrow(boolean throwException) {
94 truncationThrow = throwException;
97 /** Set the truncation fill character.
98 * @param val The character to be used in subsequent truncations.
100 public void setTruncationFill(char val) {
101 truncationFill = (byte) val;
104 /** Set the alignment flag.
105 * @param val Should numbers be right aligned?
107 public void setAlign(boolean val) {
111 /** Set the range of real numbers that will be formatted in
112 * non-scientific notation, i.e., .00001 rather than 1.0e-5.
113 * The sign of the number is ignored.
114 * @param min The minimum value for non-scientific notation.
115 * @param max The maximum value for non-scientific notation.
117 public void setSimpleRange(double min, double max) {
122 /** Format an int into an array.
123 * @param val The int to be formatted.
124 * @param array The array in which to place the result.
125 * @return The number of characters used.
127 public int format(int val, byte[] array) throws TruncationException {
128 return format(val, array, 0, array.length);
131 /** Format an int into an existing array.
132 * @param val Integer to be formatted
133 * @param buf Buffer in which result is to be stored
134 * @param off Offset within buffer
135 * @param len Maximum length of integer
136 * @return offset of next unused character in input buffer.
138 public int format(int val, byte[] buf,
139 int off, int len) throws TruncationException {
142 if (val == Integer.MIN_VALUE) {
143 if (len > 10 || (!truncateOnOverflow && buf.length - off > 10)) {
144 return format("-2147483648", buf, off, len);
146 truncationFiller(buf, off, len);
151 int pos = Math.abs(val);
153 // First count the number of characters in the result.
154 // Otherwise we need to use an intermediary buffer.
159 while (ndig < 10 && pos >= dmax) {
168 // Truncate if necessary.
169 if ((truncateOnOverflow && ndig > len) || ndig > buf.length - off) {
170 truncationFiller(buf, off, len);
174 // Right justify if requested.
176 off = alignFill(buf, off, len - ndig);
179 // Now insert the actual characters we want -- backwards
180 // We use a do{} while() to handle the case of 0.
186 buf[xoff] = digits[pos % 10];
192 buf[xoff] = (byte) '-';
198 /** Format a long into an array.
199 * @param val The long to be formatted.
200 * @param array The array in which to place the result.
201 * @return The number of characters used.
203 public int format(long val, byte[] array) throws TruncationException {
204 return format(val, array, 0, array.length);
207 /** Format a long into an existing array.
208 * @param val Long to be formatted
209 * @param buf Buffer in which result is to be stored
210 * @param off Offset within buffer
211 * @param len Maximum length of integer
212 * @return offset of next unused character in input buffer.
214 public int format(long val, byte[] buf,
215 int off, int len) throws TruncationException {
218 if (val == Long.MIN_VALUE) {
219 if (len > 19 || (!truncateOnOverflow && buf.length - off > 19)) {
220 return format("-9223372036854775808", buf, off, len);
222 truncationFiller(buf, off, len);
227 long pos = Math.abs(val);
229 // First count the number of characters in the result.
230 // Otherwise we need to use an intermediary buffer.
235 // Might be faster to try to do this partially in ints
236 while (ndig < 19 && pos >= dmax) {
246 // Truncate if necessary.
248 if ((truncateOnOverflow && ndig > len) || ndig > buf.length - off) {
249 truncationFiller(buf, off, len);
253 // Right justify if requested.
255 off = alignFill(buf, off, len - ndig);
258 // Now insert the actual characters we want -- backwards.
264 buf[xoff] = (byte) '0';
265 boolean last = (pos == 0);
269 // Work on ints rather than longs.
271 int giga = (int) (pos % 1000000000L);
276 for (int i = 0; i < 9; i += 1) {
278 buf[xoff] = digits[giga % 10];
281 if (last && giga == 0) {
289 buf[xoff] = (byte) '-';
295 /** Format a boolean into an existing array.
297 public int format(boolean val, byte[] array) {
298 return format(val, array, 0, array.length);
301 /** Format a boolean into an existing array
302 * @param val The boolean to be formatted
303 * @param array The buffer in which to format the data.
304 * @param off The starting offset within the buffer.
305 * @param len The maximum number of characters to use
306 * use in formatting the number.
307 * @return Offset of next available character in buffer.
309 public int format(boolean val, byte[] array, int off,
311 if (align && len > 1) {
312 off = alignFill(array, off, len - 1);
317 array[off] = (byte) 'T';
319 array[off] = (byte) 'F';
326 /** Insert a string at the beginning of an array */
327 public int format(String val, byte[] array) {
328 return format(val, array, 0, array.length);
331 /** Insert a String into an existing character array.
332 * If the String is longer than len, then only the
333 * the initial len characters will be inserted.
334 * @param val The string to be inserted. A null string
335 * will insert len spaces.
336 * @param array The buffer in which to insert the string.
337 * @param off The starting offset to insert the string.
338 * @param len The maximum number of characters to insert.
339 * @return Offset of next available character in buffer.
341 public int format(String val, byte[] array, int off, int len) {
344 for (int i = 0; i < len; i += 1) {
345 array[off + i] = (byte) ' ';
350 int slen = val.length();
352 if ((truncateOnOverflow && slen > len) || (slen > array.length - off)) {
353 val = val.substring(0, len);
357 if (align && (len > slen)) {
358 off = alignFill(array, off, len - slen);
361 /** We should probably require ASCII here, but for the nonce we do not [TAM 5/11] */
362 System.arraycopy(val.getBytes(), 0, array, off, slen);
366 /** Format a float into an array.
367 * @param val The float to be formatted.
368 * @param array The array in which to place the result.
369 * @return The number of characters used.
371 public int format(float val, byte[] array) throws TruncationException {
372 return format(val, array, 0, array.length);
375 /** Format a float into an existing byteacter array.
377 * This is hard to do exactly right... The JDK code does
378 * stuff with rational arithmetic and so forth.
379 * We use a much simpler algorithm which may give
380 * an answer off in the lowest order bit.
381 * Since this is pure Java, it should still be consistent
382 * from machine to machine.
384 * Recall that the binary representation of
385 * the float is of the form <tt>d = 0.bbbbbbbb x 2<sup>n</sup></tt>
386 * where there are up to 24 binary digits in the binary
387 * fraction (including the assumed leading 1 bit
388 * for normalized numbers).
389 * We find a value m such that <tt>10<sup>m</su> d</tt> is between
390 * <tt>2<sup>24</sup></tt> and <tt>>2<sup>32</sup></tt>.
391 * This product will be exactly convertible to an int
392 * with no loss of precision. Getting the
393 * decimal representation for that is trivial (see formatInteger).
394 * This is a decimal mantissa and we have an exponent (<tt>-m</tt>).
395 * All we have to do is manipulate the decimal point
396 * to where we want to see it. Errors can
397 * arise due to roundoff in the scaling multiplication, but
398 * should be very small.
400 * @param val Float to be formatted
401 * @param buf Buffer in which result is to be stored
402 * @param off Offset within buffer
403 * @param len Maximum length of field
404 * @return Offset of next character in buffer.
406 public int format(float val, byte[] buf,
407 int off, int len) throws TruncationException {
409 float pos = (float) Math.abs(val);
415 return format("0.0", buf, off, len);
416 } else if (Float.isNaN(val)) {
417 return format("NaN", buf, off, len);
418 } else if (Float.isInfinite(val)) {
420 return format("Infinity", buf, off, len);
422 return format("-Infinity", buf, off, len);
426 int power = (int) Math.floor((Math.log(pos) * ilog10));
427 int shift = 8 - power;
431 // Scale the number so that we get a number ~ n x 10^8.
433 scale = (float) tenpow[shift + zeropow];
435 // Can get overflow if the original number is
436 // very small, so we break out the shift
437 // into two multipliers.
438 scale2 = (float) tenpow[30 + zeropow];
439 scale = (float) tenpow[shift - 30 + zeropow];
443 pos = (pos * scale) * scale2;
445 // Parse the float bits.
447 int bits = Float.floatToIntBits(pos);
449 // The exponent should be a little more than 23
450 int exp = ((bits & 0x7F800000) >> 23) - 127;
452 int numb = (bits & 0x007FFFFF);
456 numb |= (0x00800000);
463 // Multiple this number by the excess of the exponent
464 // over 24. This completes the conversion of float to int
465 // (<<= did not work on Alpha TruUnix)
467 numb = numb << (exp - 23L);
469 // Get a decimal mantissa.
470 boolean oldAlign = align;
472 int ndig = format(numb, tbuf1, 0, 32);
476 // Now format the float.
478 return combineReal(val, buf, off, len, tbuf1, ndig, shift);
481 /** Format a double into an array.
482 * @param val The double to be formatted.
483 * @param array The array in which to place the result.
484 * @return The number of characters used.
486 public int format(double val, byte[] array) throws TruncationException {
487 return format(val, array, 0, array.length);
490 /** Format a double into an existing character array.
492 * This is hard to do exactly right... The JDK code does
493 * stuff with rational arithmetic and so forth.
494 * We use a much simpler algorithm which may give
495 * an answer off in the lowest order bit.
496 * Since this is pure Java, it should still be consistent
497 * from machine to machine.
499 * Recall that the binary representation of
500 * the double is of the form <tt>d = 0.bbbbbbbb x 2<sup>n</sup></tt>
501 * where there are up to 53 binary digits in the binary
502 * fraction (including the assumed leading 1 bit
503 * for normalized numbers).
504 * We find a value m such that <tt>10<sup>m</su> d</tt> is between
505 * <tt>2<sup>53</sup></tt> and <tt>>2<sup>63</sup></tt>.
506 * This product will be exactly convertible to a long
507 * with no loss of precision. Getting the
508 * decimal representation for that is trivial (see formatLong).
509 * This is a decimal mantissa and we have an exponent (<tt>-m</tt>).
510 * All we have to do is manipulate the decimal point
511 * to where we want to see it. Errors can
512 * arise due to roundoff in the scaling multiplication, but
513 * should be no more than a single bit.
515 * @param val Double to be formatted
516 * @param buf Buffer in which result is to be stored
517 * @param off Offset within buffer
518 * @param len Maximum length of integer
519 * @return offset of next unused character in input buffer.
521 public int format(double val, byte[] buf,
522 int off, int len) throws TruncationException {
524 double pos = Math.abs(val);
528 // Special cases -- It is OK if these get truncated.
530 return format("0.0", buf, off, len);
531 } else if (Double.isNaN(val)) {
532 return format("NaN", buf, off, len);
533 } else if (Double.isInfinite(val)) {
535 return format("Infinity", buf, off, len);
537 return format("-Infinity", buf, off, len);
541 int power = (int) (Math.log(pos) * ilog10);
542 int shift = 17 - power;
546 // Scale the number so that we get a number ~ n x 10^17.
548 scale = tenpow[shift + zeropow];
550 // Can get overflow if the original number is
551 // very small, so we break out the shift
552 // into two multipliers.
553 scale2 = tenpow[200 + zeropow];
554 scale = tenpow[shift - 200 + zeropow];
558 pos = (pos * scale) * scale2;
560 // Parse the double bits.
562 long bits = Double.doubleToLongBits(pos);
564 // The exponent should be a little more than 52.
565 int exp = (int) (((bits & 0x7FF0000000000000L) >> 52) - 1023);
567 long numb = (bits & 0x000FFFFFFFFFFFFFL);
571 numb |= (0x0010000000000000L);
578 // Multiple this number by the excess of the exponent
579 // over 52. This completes the conversion of double to long.
580 numb = numb << (exp - 52);
582 // Get a decimal mantissa.
583 boolean oldAlign = align;
585 int ndig = format(numb, tbuf1, 0, 32);
588 // Now format the double.
590 return combineReal(val, buf, off, len, tbuf1, ndig, shift);
593 /** This method formats a double given
594 * a decimal mantissa and exponent information.
595 * @param val The original number
596 * @param buf Output buffer
597 * @param off Offset into buffer
598 * @param len Maximum number of characters to use in buffer.
599 * @param mant A decimal mantissa for the number.
600 * @param lmant The number of characters in the mantissa
601 * @param shift The exponent of the power of 10 that
602 * we shifted val to get the given mantissa.
603 * @return Offset of next available character in buffer.
605 int combineReal(double val, byte[] buf, int off, int len,
606 byte[] mant, int lmant, int shift) throws TruncationException {
608 // First get the minimum size for the number
610 double pos = Math.abs(val);
611 boolean simple = false;
615 if (pos >= simpleMin && pos <= simpleMax) {
619 int exp = lmant - shift - 1;
624 boolean oldAlign = align;
626 lexp = format(exp, tbuf2, 0, 32);
629 minSize = lexp + 2; // e.g., 2e-12
630 maxSize = lexp + lmant + 2; // add in "." and e
633 minSize = exp + 1; // e.g. 32
635 // Special case. E.g., 99.9 has
636 // minumum size of 3.
638 for (i = 0; i < lmant && i <= exp; i += 1) {
639 if (mant[i] != (byte) '9') {
643 if (i > exp && i < lmant && mant[i] >= (byte) '5') {
647 maxSize = lmant + 1; // Add in "."
648 if (maxSize <= minSize) { // Very large numbers.
649 maxSize = minSize + 1;
653 maxSize = 1 + Math.abs(exp) + lmant;
661 // Can the number fit?
662 if ((truncateOnOverflow && minSize > len)
663 || (minSize > buf.length - off)) {
664 truncationFiller(buf, off, len);
668 // Do we need to align it?
669 if (maxSize < len && align) {
670 int nal = len - maxSize;
671 off = alignFill(buf, off, nal);
678 // Now begin filling in the buffer.
680 buf[off] = (byte) '-';
687 return Math.abs(mantissa(mant, lmant, exp, simple, buf, off, len));
689 off = mantissa(mant, lmant, 0, simple, buf, off, len - lexp - 1);
693 // Handle the expanded exponent by filling
694 if (exp == 9 || exp == 99) {
696 if (off + len == minSize) {
697 truncationFiller(buf, off, len);
700 // Steal a character from the mantissa.
705 lexp = format(exp, tbuf2, 0, 32);
707 buf[off] = (byte) 'E';
709 System.arraycopy(tbuf2, 0, buf, off, lexp);
714 /** Write the mantissa of the number. This method addresses
715 * the subtleties involved in rounding numbers.
717 int mantissa(byte[] mant, int lmant, int exp, boolean simple,
718 byte[] buf, int off, int len) {
720 // Save in case we need to extend the number.
725 buf[off] = (byte) '0';
729 buf[off] = (byte) '.';
733 // Leading 0s in small numbers.
735 while (cexp < -1 && len > 0) {
736 buf[off] = (byte) '0';
744 // Print out all digits to the left of the decimal.
745 while (exp >= 0 && pos < lmant) {
746 buf[off] = mant[pos];
752 // Trust we have enough space for this.
753 for (int i = 0; i <= exp; i += 1) {
754 buf[off] = (byte) '0';
759 // Add in a decimal if we have space.
761 buf[off] = (byte) '.';
767 // Now handle the digits to the right of the decimal.
768 while (len > 0 && pos < lmant) {
769 buf[off] = mant[pos];
776 // Now handle rounding.
778 if (pos < lmant && mant[pos] >= (byte) '5') {
781 // Increment to the left until we find a non-9
782 for (i = off - 1; i >= off0; i -= 1) {
785 if (buf[i] == (byte) '.' || buf[i] == (byte) '-') {
788 if (buf[i] == (byte) '9') {
796 // Now we handle 99.99 case. This can cause problems
797 // in two cases. If we are not using scientific notation
798 // then we may want to convert 99.9 to 100., i.e.,
799 // we need to move the decimal point. If there is no
800 // decimal point, then we must not be truncating on overflow
801 // but we should be allowed to write it to the
802 // next character (i.e., we are not at the end of buf).
804 // If we are printing in scientific notation, then we want
805 // to convert 9.99 to 1.00, i.e. we do not move the decimal.
806 // However we need to signal that the exponent should be
807 // incremented by one.
809 // We cannot have aligned the number, since that requires
810 // the full precision number to fit within the requested
811 // length, and we would have printed out the entire
812 // mantissa (i.e., pos >= lmant)
816 buf[off0] = (byte) '1';
817 boolean foundDecimal = false;
818 for (i = off0 + 1; i < off; i += 1) {
819 if (buf[i] == (byte) '.') {
831 if (simple && !foundDecimal) {
832 buf[off + 1] = (byte) '0'; // 99 went to 100
836 off = -off; // Signal to change exponent if necessary.
844 /** Fill the buffer with truncation characters. After filling
845 * the buffer, a TruncationException will be thrown if the
846 * appropriate flag is set.
848 void truncationFiller(byte[] buffer, int offset, int length)
849 throws TruncationException {
851 for (int i = offset; i < offset + length; i += 1) {
852 buffer[i] = truncationFill;
854 if (truncationThrow) {
855 throw new TruncationException();
860 /** Fill the buffer with blanks to align
863 public int alignFill(byte[] buffer, int offset, int len) {
864 for (int i = offset; i < offset + len; i += 1) {
865 buffer[i] = (byte) ' ';