/** * */ package com.trendrr.json.stream; import java.io.BufferedReader; import java.io.IOException; import java.io.InputStream; import java.io.InputStreamReader; import java.io.Reader; import java.io.StringReader; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; import com.trendrr.oss.DynMap; import com.trendrr.oss.DynMapFactory; import com.trendrr.oss.FileHelper; import com.trendrr.oss.concurrent.Sleep; import com.trendrr.oss.exceptions.TrendrrException; import com.trendrr.oss.exceptions.TrendrrIOException; import com.trendrr.oss.exceptions.TrendrrParseException; /** * Provides a way to read streams of json dictionaries. * * will start a dictionary on the first '{' and finish the * dict on the corresponding * '}' * * It will ignore any characters between dicts, so any dilimiter can be used. * each dict must be valid, an exception will be thrown on the first parse error. * * * @author Dustin Norlander * @created Nov 2, 2011 * */ public class JSONStreamReader { protected static Log log = LogFactory.getLog(JSONStreamReader.class); Reader reader; long maxBufferedChars = FileHelper.megsToBytes(4); //max size to write before we assume this is an invalid stream. public JSONStreamReader(InputStream stream) { this(new InputStreamReader(stream)); } public JSONStreamReader(Reader reader) { this.reader =new BufferedReader(reader); } public static void main(String ...strings) throws TrendrrException { String json = "blah blah{ \"key\" : \"value\"}{ \"key1\" : \"valu\\\"e1}\"}"; try { json = FileHelper.loadString("tweetparse.json"); } catch (Exception e) { log.error("Caught", e); } JSONStreamReader reader = new JSONStreamReader(new StringReader(json)); DynMap mp; while((mp = reader.readNext()) != null) { System.out.println(mp.toJSONString()); } } /** * returns null on eof. throws exception or returns parsed object. * @return * @throws IOException */ public DynMap readNext() throws TrendrrException { StringBuilder json = new StringBuilder("{"); try { long numRead = 0; int openBrackets = 1; boolean isQuote = false; boolean isEscape = false; //read until the first open bracket int current = this.reader.read(); while(current != '{' && current != -1) { numRead++; if (numRead > this.maxBufferedChars) { throw new TrendrrParseException("Read " + this.maxBufferedChars + " chars without a valid json dict. Beginning with: " + json.substring(0, 256)); } current = this.reader.read(); } do { current = this.reader.read(); if (current == -1) { return null; } char c = (char)current; json.append(c); if (!isQuote) { if (c == '{') { openBrackets++; } else if (c == '}') { openBrackets--; } } if (c == '"' && !isEscape) { isQuote = !isQuote; } if (isQuote && !isEscape && c == '\\') { isEscape = true; } else { isEscape = false; } numRead++; if (numRead > this.maxBufferedChars) { throw new TrendrrParseException("Read " + this.maxBufferedChars + " chars without a valid json dict. Beginning with: " + json.substring(0, 256)); } } while(openBrackets != 0); DynMap dm = DynMapFactory.instanceFromJSON(json.toString()); if (dm == null) { throw new TrendrrParseException("unable to parse json string"); } return dm; } catch (IOException x) { throw new TrendrrIOException(x); } } }