/*
Viewer for Khan Academy
Copyright (C) 2012 Concentric Sky, Inc.
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>.
*/
package com.concentricsky.android.khanacademy.util;
import java.io.IOException;
import java.io.InputStream;
import java.net.SocketTimeoutException;
import java.net.URL;
import java.net.URLConnection;
import java.sql.SQLException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.Callable;
import org.apache.http.client.ClientProtocolException;
import android.webkit.WebResourceResponse;
import com.concentricsky.android.khan.R;
import com.concentricsky.android.khanacademy.data.KADataService;
import com.concentricsky.android.khanacademy.data.db.Caption;
import com.fasterxml.jackson.core.JsonParseException;
import com.fasterxml.jackson.core.type.TypeReference;
import com.fasterxml.jackson.databind.JsonMappingException;
import com.fasterxml.jackson.databind.ObjectMapper;
import com.j256.ormlite.dao.Dao;
import com.j256.ormlite.stmt.QueryBuilder;
/**
* Handles retrieving subtitles from the web, caching them, and returning them to the video fragment.
*
* @author austinlally
*
*/
public class CaptionManager {
/* *********************** STATIC ***************************/
public static final String LOG_TAG = CaptionManager.class.getSimpleName();
public static final int CONNECT_TIMEOUT = 5000;
/* *********************** PRIVATE ***************************/
private KADataService dataService;
private String subtitleFormat;
/* *********************** PUBLIC ***************************/
/**
* Get a CaptionManager to manage your captions!
*
* @param context Any context will do. Used to look up a string resource, and no reference is kept.
*/
public CaptionManager(KADataService dataService) {
this.dataService = dataService;
subtitleFormat = dataService.getString(R.string.url_format_subtitles);
}
/**
* Get a {@link WebResourceResponse} with subtitles for the video with the given youtube id.
*
* The response contains a UTF-8 encoded json object with the subtitles received
* from universalsubtitles.org.
*
* @param youtubeId The youtube id of the video whose subtitles we need.
* @return The {@link WebResourceResponse} with the subtitles, or {@code null} in case of error or if none are found.
*/
public WebResourceResponse fetchRawCaptionResponse(String youtubeId) {
Log.d(LOG_TAG, "fetchRawCaptionResponse");
String youtube_url = "http://www.youtube.com/watch?v=" + youtubeId;
try {
URL url = new URL(String.format(subtitleFormat, youtube_url, "en"));
URLConnection connection = url.openConnection();
connection.setConnectTimeout(CONNECT_TIMEOUT);
connection.setUseCaches(true);
InputStream in = null;
try {
in = connection.getInputStream();
} catch (SocketTimeoutException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
//various exceptions including at least ConnectException and UnknownHostException can happen if we're offline
}
return in==null? null: new WebResourceResponse("application/json", "UTF-8", in);
} catch (ClientProtocolException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
return null;
}
public List<Caption> getCaptions(String youtubeId) {
Log.d(LOG_TAG, "getCaptions: " + youtubeId);
List<Caption> result = null;
Dao<Caption, Integer> captionDao = null;
try {
captionDao = dataService.getHelper().getDao(Caption.class);
QueryBuilder<Caption, Integer> q = captionDao.queryBuilder();
q.where().eq("youtube_id", youtubeId);
q.orderBy("sub_order", true);
// TODO : Avoid inserting duplicates in the first place, and do a migration to clean up.
q.groupBy("sub_order");
result = q.query();
} catch (SQLException e) {
e.printStackTrace();
}
if (result != null && result.size() > 0) {
Log.d(LOG_TAG, " already cached; returning");
return result;
}
// If we do not already have the captions, try fetching them.
// Clients will call this in a background thread, so we can take our time.
WebResourceResponse response = fetchRawCaptionResponse(youtubeId);
result = parseAPIResponse(response);
result = pruneEmptyCaptions(result);
result = persist(result, youtubeId);
return result;
}
private List<Caption> parseAPIResponse(WebResourceResponse response) {
Log.d(LOG_TAG, "parseAPIResponse");
List<Caption> result = null;
if (response != null) {
ObjectMapper mapper = new ObjectMapper();
try {
result = mapper.readValue(response.getData(), new TypeReference<List<Caption>>(){});
Log.d(LOG_TAG, " result length is " + result.size());
} catch (JsonParseException e) {
// At 5pm on a Thursday, I encountered this exception. Did a little digging, and the response turned
// out to contain this:
// <html> <body> <div style="text-align: center; padding-top: 200px">
// Amara is currently unavailable for scheduled maintenance. The site will be back shortly.
// </div> </body> </html>
// Another at 2:45 Tuesday: Illegal character ((CTRL-CHAR, code 31)): only regular white space (\r, \n, \t) is allowed between tokens
// This has happened more than once around the same time. Caught it again today, Tue 12/4, at about 3:00.
// At any rate, these all fall into the "failed to download" category rather than the "none exist" category.
e.printStackTrace();
} catch (JsonMappingException e) {
// TODO Auto-generated catch block
e.printStackTrace();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
} else {
Log.d(LOG_TAG, " response was null");
}
return result;
}
private List<Caption> pruneEmptyCaptions(List<Caption> toPrune) {
Log.d(LOG_TAG, "pruneEmptyCaptions");
// Ensure captions have times. See long comment below for explanation.
// Otherwise, return null and pretend there were no captions.
List<Caption> result = null;
if (toPrune != null) {
for (Caption c : toPrune) {
// Log.d(LOG_TAG, String.format("%03.2f %10d %s", c.getSub_order(), c.getStart_time(), c.getText()));
if (c.getStart_time() > 0) {
result = toPrune;
break;
}
}
}
// prune individual empty captions
if (result != null && result.size() > 0) {
List<Caption> toRemove = new ArrayList<Caption>();
for (Caption caption : result) {
if (caption.getText().trim().length() == 0) {
toRemove.add(caption);
}
}
for (Caption caption : toRemove) {
result.remove(caption);
}
}
return result;
}
private List<Caption> persist(final List<Caption> toSave, final String youtubeId) {
try {
final Dao<Caption, Integer> captionDao = dataService.getHelper().getDao(Caption.class);
if (captionDao != null && toSave != null && toSave.size() > 0) {
// Batching speeds this up significantly.
captionDao.callBatchTasks(new Callable<Void>() {
@Override
public Void call() throws Exception {
for (Caption c : toSave) {
try {
c.setYoutube_id(youtubeId);
captionDao.create(c);
} catch (SQLException e) {
e.printStackTrace();
}
}
return null;
}
});
}
} catch (SQLException e) {
e.printStackTrace();
} catch (Exception e) {
e.printStackTrace();
}
return toSave;
}
/* Example of a response that isn't quite what we want.
* In these cases, we just pretend there are no subtitles at all, as an incomplete list without time stamps isn't that useful.
* [
{
"subtitle_id": "wcpvewparc82371936",
"text": "What I want to do in this video is show you that some of the things that we've been talking about ",
"start_time": -1,
"end_time": -1,
"sub_order": 1.0,
"start_of_paragraph": false
},
{
"subtitle_id": "ivddkbpjit82695805",
"text": "on the last few videos actually do happen, and in particular,",
"start_time": -1,
"end_time": -1,
"sub_order": 2.0,
"start_of_paragraph": false
},
{
"subtitle_id": "avfjvqzffm83421791",
"text": "talk about how one of these speculative attacks on a currency can turn into",
"start_time": -1,
"end_time": -1,
"sub_order": 3.0,
"start_of_paragraph": false
},
{
"subtitle_id": "fmfcpugiut83483196",
"text": "a banking crisis! This is a chart",
"start_time": -1,
"end_time": -1,
"sub_order": 4.0,
"start_of_paragraph": false
},
{
"subtitle_id": "otacwxlpyh83540836",
"text": "from Oxford Economics, and it shows two things:",
"start_time": -1,
"end_time": -1,
"sub_order": 5.0,
"start_of_paragraph": false
},
{
"subtitle_id": "yyqivgduxz83628118",
"text": "Thailand's exchange rate and short-term interest rates from the early",
"start_time": -1,
"end_time": -1,
"sub_order": 6.0,
"start_of_paragraph": false
},
{
"subtitle_id": "rtbbbazapm83760974",
"text": "1990's to the present, there's a couple of interesting",
"start_time": -1,
"end_time": -1,
"sub_order": 7.0,
"start_of_paragraph": false
},
{
"subtitle_id": "ccxmomlhbe83829991",
"text": "things that you might see over here; the first is the exchange rate, you see",
"start_time": -1,
"end_time": -1,
"sub_order": 8.0,
"start_of_paragraph": false
}
]
*/
}