1 package nom.tam.util;
\r
3 import java.io.UnsupportedEncodingException;
\r
4 import java.util.logging.Level;
\r
5 import java.util.logging.Logger;
\r
7 /** This class provides routines
\r
8 * for efficient parsing of data stored in a byte array.
\r
9 * This routine is optimized (in theory at least!) for efficiency
\r
10 * rather than accuracy. The values read in for doubles or floats
\r
11 * may differ in the last bit or so from the standard input
\r
12 * utilities, especially in the case where a float is specified
\r
13 * as a very long string of digits (substantially longer than
\r
14 * the precision of the type).
\r
16 * The get methods generally are available with or without a length
\r
17 * parameter specified. When a length parameter is specified only
\r
18 * the bytes with the specified range from the current offset will
\r
19 * be search for the number. If no length is specified, the entire
\r
20 * buffer from the current offset will be searched.
\r
22 * The getString method returns a string with leading and trailing
\r
23 * white space left intact. For all other get calls, leading
\r
24 * white space is ignored. If fillFields is set, then the get
\r
25 * methods check that only white space follows valid data and a
\r
26 * FormatException is thrown if that is not the case. If
\r
27 * fillFields is not set and valid data is found, then the
\r
28 * methods return having read as much as possible. E.g., for
\r
29 * the sequence "T123.258E13", a getBoolean, getInteger and
\r
30 * getFloat call would return true, 123, and 2.58e12 when
\r
31 * called in succession.
\r
34 public class ByteParser {
\r
36 /** Array being parsed */
\r
37 private byte[] input;
\r
38 /** Current offset into input. */
\r
40 /** Length of last parsed value */
\r
41 private int numberLength;
\r
42 /** Did we find a sign last time we checked? */
\r
43 private boolean foundSign;
\r
44 /** Do we fill up fields? */
\r
45 private boolean fillFields = false;
\r
47 /** Construct a parser.
\r
48 * @param input The byte array to be parsed.
\r
49 * Note that the array can be re-used by
\r
50 * refilling its contents and resetting the offset.
\r
52 public ByteParser(byte[] input) {
\r
57 /** Set the buffer for the parser */
\r
58 public void setBuffer(byte[] buf) {
\r
63 /** Get the buffer being used by the parser */
\r
64 public byte[] getBuffer() {
\r
68 /** Set the offset into the array.
\r
69 * @param offset The desired offset from the beginning
\r
72 public void setOffset(int offset) {
\r
73 this.offset = offset;
\r
76 /** Do we require a field to completely fill up the specified
\r
77 * length (with optional leading and trailing white space.
\r
78 @param flag Is filling required?
\r
80 public void setFillFields(boolean flag) {
\r
84 /** Get the current offset
\r
85 @return The current offset within the buffer.
\r
87 public int getOffset() {
\r
91 /** Get the number of characters used to parse the previous
\r
92 * number (or the length of the previous String returned).
\r
94 public int getNumberLength() {
\r
95 return numberLength;
\r
98 /** Read in the buffer until a double is read. This will read
\r
99 * the entire buffer if fillFields is set.
\r
100 * @return The value found.
\r
102 public double getDouble() throws FormatException {
\r
103 return getDouble(input.length - offset);
\r
106 /** Look for a double in the buffer.
\r
107 * Leading spaces are ignored.
\r
108 * @param length The maximum number of characters
\r
109 * used to parse this number. If fillFields
\r
110 * is specified then exactly only whitespace may follow
\r
111 * a valid double value.
\r
113 public double getDouble(int length) throws FormatException {
\r
115 int startOffset = offset;
\r
117 boolean error = true;
\r
122 // Skip initial blanks.
\r
123 length -= skipWhite(length);
\r
126 numberLength = offset - startOffset;
\r
130 double mantissaSign = checkSign();
\r
135 // Look for the special strings NaN, Inf,
\r
137 && (input[offset] == 'n' || input[offset] == 'N')
\r
138 && (input[offset + 1] == 'a' || input[offset + 1] == 'A')
\r
139 && (input[offset + 2] == 'n' || input[offset + 2] == 'N')) {
\r
141 number = Double.NaN;
\r
145 // Look for the longer string first then try the shorter.
\r
146 } else if (length >= 8
\r
147 && (input[offset] == 'i' || input[offset] == 'I')
\r
148 && (input[offset + 1] == 'n' || input[offset + 1] == 'N')
\r
149 && (input[offset + 2] == 'f' || input[offset + 2] == 'F')
\r
150 && (input[offset + 3] == 'i' || input[offset + 3] == 'I')
\r
151 && (input[offset + 4] == 'n' || input[offset + 4] == 'N')
\r
152 && (input[offset + 5] == 'i' || input[offset + 5] == 'I')
\r
153 && (input[offset + 6] == 't' || input[offset + 6] == 'T')
\r
154 && (input[offset + 7] == 'y' || input[offset + 7] == 'Y')) {
\r
155 number = Double.POSITIVE_INFINITY;
\r
159 } else if (length >= 3
\r
160 && (input[offset] == 'i' || input[offset] == 'I')
\r
161 && (input[offset + 1] == 'n' || input[offset + 1] == 'N')
\r
162 && (input[offset + 2] == 'f' || input[offset + 2] == 'F')) {
\r
163 number = Double.POSITIVE_INFINITY;
\r
169 number = getBareInteger(length); // This will update offset
\r
170 length -= numberLength; // Set by getBareInteger
\r
172 if (numberLength > 0) {
\r
176 // Check for fractional values after decimal
\r
177 if (length > 0 && input[offset] == '.') {
\r
182 double numerator = getBareInteger(length);
\r
183 if (numerator > 0) {
\r
184 number += numerator / Math.pow(10., numberLength);
\r
186 length -= numberLength;
\r
187 if (numberLength > 0) {
\r
193 offset = startOffset;
\r
195 throw new FormatException("Invalid real field");
\r
198 // Look for an exponent
\r
201 // Our Fortran heritage means that we allow 'D' for the exponent indicator.
\r
202 if (input[offset] == 'e' || input[offset] == 'E'
\r
203 || input[offset] == 'd' || input[offset] == 'D') {
\r
208 int sign = checkSign();
\r
213 int exponent = (int) getBareInteger(length);
\r
215 // For very small numbers we try to miminize
\r
216 // effects of denormalization.
\r
217 if (exponent * sign > -300) {
\r
218 number *= Math.pow(10., exponent * sign);
\r
220 number = 1.e-300 * (number * Math.pow(10., exponent * sign + 300));
\r
222 length -= numberLength;
\r
228 if (fillFields && length > 0) {
\r
230 if (isWhite(length)) {
\r
234 offset = startOffset;
\r
235 throw new FormatException("Non-blanks following real.");
\r
239 numberLength = offset - startOffset;
\r
240 return mantissaSign * number;
\r
243 /** Get a floating point value from the buffer. (see getDouble(int())
\r
245 public float getFloat() throws FormatException {
\r
246 return (float) getDouble(input.length - offset);
\r
249 /** Get a floating point value in a region of the buffer */
\r
250 public float getFloat(int length) throws FormatException {
\r
251 return (float) getDouble(length);
\r
254 /** Convert a region of the buffer to an integer */
\r
255 public int getInt(int length) throws FormatException {
\r
256 int startOffset = offset;
\r
258 length -= skipWhite(length);
\r
260 numberLength = offset - startOffset;
\r
265 boolean error = true;
\r
267 int sign = checkSign();
\r
272 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
\r
273 number = number * 10 + input[offset] - '0';
\r
281 offset = startOffset;
\r
282 throw new FormatException("Invalid Integer");
\r
285 if (length > 0 && fillFields) {
\r
286 if (isWhite(length)) {
\r
290 offset = startOffset;
\r
291 throw new FormatException("Non-white following integer");
\r
295 numberLength = offset - startOffset;
\r
296 return sign * number;
\r
299 /** Look for an integer at the beginning of the buffer */
\r
300 public int getInt() throws FormatException {
\r
301 return getInt(input.length - offset);
\r
304 /** Look for a long in a specified region of the buffer */
\r
305 public long getLong(int length) throws FormatException {
\r
307 int startOffset = offset;
\r
309 // Skip white space.
\r
310 length -= skipWhite(length);
\r
312 numberLength = offset - startOffset;
\r
317 boolean error = true;
\r
319 long sign = checkSign();
\r
324 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
\r
325 number = number * 10 + input[offset] - '0';
\r
333 offset = startOffset;
\r
334 throw new FormatException("Invalid long number");
\r
337 if (length > 0 && fillFields) {
\r
338 if (isWhite(length)) {
\r
341 offset = startOffset;
\r
343 throw new FormatException("Non-white following long");
\r
346 numberLength = offset - startOffset;
\r
347 return sign * number;
\r
351 * @param length The length of the string.
\r
353 public String getString(int length) {
\r
355 String s = AsciiFuncs.asciiString(input, offset, length);
\r
357 numberLength = length;
\r
361 /** Get a boolean value from the beginning of the buffer */
\r
362 public boolean getBoolean() throws FormatException {
\r
363 return getBoolean(input.length - offset);
\r
366 /** Get a boolean value from a specified region of the buffer */
\r
367 public boolean getBoolean(int length) throws FormatException {
\r
369 int startOffset = offset;
\r
370 length -= skipWhite(length);
\r
372 throw new FormatException("Blank boolean field");
\r
375 boolean value = false;
\r
376 if (input[offset] == 'T' || input[offset] == 't') {
\r
378 } else if (input[offset] != 'F' && input[offset] != 'f') {
\r
380 offset = startOffset;
\r
381 throw new FormatException("Invalid boolean value");
\r
386 if (fillFields && length > 0) {
\r
387 if (isWhite(length)) {
\r
391 offset = startOffset;
\r
392 throw new FormatException("Non-white following boolean");
\r
395 numberLength = offset - startOffset;
\r
399 /** Skip bytes in the buffer */
\r
400 public void skip(int nBytes) {
\r
404 /** Get the integer value starting at the current position.
\r
405 * This routine returns a double rather than an int/long
\r
406 * to enable it to read very long integers (with reduced
\r
407 * precision) such as 111111111111111111111111111111111111111111.
\r
408 * Note that this routine does set numberLength.
\r
410 * @param length The maximum number of characters to use.
\r
412 private double getBareInteger(int length) {
\r
414 int startOffset = offset;
\r
417 while (length > 0 && input[offset] >= '0' && input[offset] <= '9') {
\r
420 number += input[offset] - '0';
\r
424 numberLength = offset - startOffset;
\r
428 /** Skip white space. This routine skips with space in
\r
429 * the input and returns the number of character skipped.
\r
430 * White space is defined as ' ', '\t', '\n' or '\r'
\r
432 * @param length The maximum number of characters to skip.
\r
434 public int skipWhite(int length) {
\r
437 for (i = 0; i < length; i += 1) {
\r
438 if (input[offset + i] != ' ' && input[offset + i] != '\t'
\r
439 && input[offset + i] != '\n' && input[offset + i] != '\r') {
\r
449 /** Find the sign for a number .
\r
450 * This routine looks for a sign (+/-) at the current location
\r
451 * and return +1/-1 if one is found, or +1 if not.
\r
452 * The foundSign boolean is set if a sign is found and offset is
\r
455 private int checkSign() {
\r
459 if (input[offset] == '+') {
\r
463 } else if (input[offset] == '-') {
\r
472 /** Is a region blank?
\r
473 * @param length The length of the region to be tested
\r
475 private boolean isWhite(int length) {
\r
476 int oldOffset = offset;
\r
477 boolean value = skipWhite(length) == length;
\r
478 offset = oldOffset;
\r