/**
* ISO8601
* Copyright 2011 by Michael Peter Christen
* First released 2.1.2011 at http://yacy.net
*
* $LastChangedDate$
* $LastChangedRevision$
* $LastChangedBy$
*
* This library is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* This library is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public License
* along with this program in the file lgpl21.txt
* If not, see <http://www.gnu.org/licenses/>.
*/
package net.yacy.cora.date;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Calendar;
import java.util.Date;
import java.util.Locale;
import java.util.NoSuchElementException;
import java.util.StringTokenizer;
public class ISO8601Formatter extends AbstractFormatter implements DateFormatter {
/** pattern for a W3C datetime variant of a non-localized ISO8601 date */
private static final String PATTERN_ISO8601 = "yyyy-MM-dd'T'HH:mm:ss'Z'";
/** Date formatter/non-sloppy parser for W3C datetime (ISO8601) in GMT/UTC */
private static final SimpleDateFormat FORMAT_ISO8601 = new SimpleDateFormat(PATTERN_ISO8601, Locale.US);
static {
FORMAT_ISO8601.setTimeZone(AbstractFormatter.UTCtimeZone);
}
public static final ISO8601Formatter FORMATTER = new ISO8601Formatter();
public ISO8601Formatter() {
last_time = 0;
last_format = "";
}
/**
* Parse dates as defined in <a href="http://www.w3.org/TR/NOTE-datetime">http://www.w3.org/TR/NOTE-datetime</a>.
* This format (also specified in ISO8601) allows different "precisions".
* The following lower precision versions for the complete date
* "2007-12-19T10:20:30.567+0300" are allowed:<br>
* "2007"<br>
* "2007-12"<br>
* "2007-12-19"<br>
* "2007-12-19T10:20+0300<br>
* "2007-12-19T10:20:30+0300<br>
* "2007-12-19T10:20:30.567+0300<br>
* Additionally a timezone offset of "+0000" can be substituted as "Z".<br>
* Parsing is done in a fuzzy way. If there is an illegal character somewhere in
* the String, the date parsed so far will be returned, e.g. the input
* "2007-12-19FOO" would return a date that represents "2007-12-19".
*
* @param s
* @return
* @throws ParseException
*/
@Override
public Calendar parse(String s, final int timezoneOffset) throws ParseException {
// do some lazy checks here
s = s.trim();
while (!s.isEmpty() && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
if (s.startsWith("{")) s = s.substring(1);
if (s.endsWith("}")) s = s.substring(0, s.length() - 1);
if (s.startsWith("[")) s = s.substring(1);
if (s.endsWith("]")) s = s.substring(0, s.length() - 1);
while (!s.isEmpty() && (s.charAt(0) > '9' || s.charAt(0) < '0')) s = s.substring(1);
if (s.endsWith("--")) s = s.substring(0, s.length() - 2) + "00";
int p = s.indexOf(';'); if (p >= 0) s = s.substring(0, p); // a semicolon may be used to separate two dates from each other; then we take the first
p = s.indexOf(','); if (p >= 0) s = s.substring(0, p); // a comma may be used to separate two dates from each other; then we take the first
while (!s.isEmpty() && s.endsWith("?")) s = s.substring(0, s.length() - 1); // sometimes used if write is not sure about date
// no go for exact parsing
final Calendar cal = Calendar.getInstance(AbstractFormatter.UTCtimeZone, Locale.US);
cal.clear();
// split 2007-12-19T10:20:30.789+0500 into its parts
// correct: yyyy['-'MM['-'dd['T'HH':'MM[':'ss['.'SSS]]('Z'|ZZZZZ)]]]
final StringTokenizer t = new StringTokenizer(s, "-T:.Z+", true);
if (s == null || t.countTokens() == 0)
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
try {
// year
cal.set(Calendar.YEAR, Integer.parseInt(t.nextToken()));
// month
if (t.nextToken().equals("-")) {
cal.set(Calendar.MONTH, Integer.parseInt(t.nextToken()) - 1);
} else {
return cal;
}
// day
if (t.nextToken().equals("-")) {
cal.set(Calendar.DAY_OF_MONTH, Integer.parseInt(t.nextToken()));
} else {
return cal;
}
// The standard says:
// if there is an hour there has to be a minute and a timezone token, too.
if (t.nextToken().equals("T")) {
final int hour = Integer.parseInt(t.nextToken());
// no error, got hours
int min = 0;
int sec = 0;
int msec = 0;
if (t.nextToken().equals(":")) {
min = Integer.parseInt(t.nextToken());
// no error, got minutes
// need TZ or seconds
String token = t.nextToken();
if (token.equals(":")) {
sec = Integer.parseInt(t.nextToken());
// need millisecs or TZ
token = t.nextToken();
if (token.equals(".")) {
msec = Integer.parseInt(t.nextToken());
// need TZ
token = t.nextToken();
}
}
// check for TZ data
int offset;
if (token.equals("Z")) {
offset = 0;
} else {
int sign = 0;
if (token.equals("+")) {
sign = 1;
} else if (token.equals("-")) {
sign = -1;
} else {
// no legal TZ offset found
return cal;
}
offset = sign * Integer.parseInt(t.nextToken()) * 10 * 3600;
}
cal.set(Calendar.ZONE_OFFSET, offset);
}
cal.set(Calendar.HOUR_OF_DAY, hour);
cal.set(Calendar.MINUTE, min);
cal.set(Calendar.SECOND, sec);
cal.set(Calendar.MILLISECOND, msec);
}
} catch (final NoSuchElementException e) {
// ignore this as it is perfectly fine to have non-complete date in this format
} catch (final Exception e) {
// catch all Exceptions and return what we parsed so far
//serverLog.logInfo("SERVER", "parseISO8601: DATE ERROR with: '" + s + "' got so far: '" + cal.toString());
}
// in case we couldn't even parse a year
if (!cal.isSet(Calendar.YEAR))
throw new ParseException("parseISO8601: Cannot parse '" + s + "'", 0);
return cal;
}
/**
* Creates a String representation of a Date using the format defined
* in ISO8601/W3C datetime
* The result will be in UTC/GMT, e.g. "2007-12-19T10:20:30Z".
*
* @param date The Date instance to transform.
* @return A fixed width (20 chars) ISO8601 date String.
*/
@Override
public final String format(final Date date) {
if (date == null) return "";
if (Math.abs(date.getTime() - last_time) < 1000) return last_format;
synchronized (FORMAT_ISO8601) {
last_format = FORMAT_ISO8601.format(date);
last_time = date.getTime();
}
return last_format;
}
@Override
public final String format() {
long time = System.currentTimeMillis();
if (Math.abs(time - last_time) < 1000) return last_format;
synchronized (FORMAT_ISO8601) {
last_format = FORMAT_ISO8601.format(new Date(time));
last_time = time;
}
return last_format;
}
}