package org.wikibrain.wikidata; import org.wikibrain.core.model.NameSpace; import org.wikibrain.core.model.RawPage; import org.wikibrain.core.model.Title; import org.wikidata.wdtk.dumpfiles.MwRevision; import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.Locale; import java.util.TimeZone; /** * Connects a WikiBrain raw page to a WikidataToolkit MwRevision * * @author Shilad Sen */ public class RawPageWrapper implements MwRevision { private final RawPage raw; public RawPageWrapper(RawPage raw) { this.raw = raw; } /** * <p> * Returns the title string of the revised page, including namespace * prefixes and subpages, if any. The string is formatted as it would be on * an HTML page and not as in the URL used by MediaWiki for the page. For * example, spaces are represented as spaces and not as underscores. For * example * </p> * <p> * On a single MediaWiki site, the prefixed page title is a key for a page * at any given moment. However, users may change the title and namespace by * moving pages. The page id provides a better clue to identify pages across * history. * </p> * * @return title string */ @Override public String getPrefixedTitle() { Title t = raw.getTitle(); if (raw.getNamespace() == NameSpace.WIKIPEDIA) { return t.getTitleStringWithoutNamespace(); } else { return t.getNamespaceString() + ":" + t.getTitleStringWithoutNamespace(); } } /** * <p> * Returns the title string of the revised page without any namespace * prefixes. The string is formatted as it would be on an HTML page and not * as in the URL used by MediaWiki for the page. For example, spaces are * represented as spaces and not as underscores. For example * </p> * <p> * On a single MediaWiki site, the combination of page title and page * namespace is a key for a page at any given moment. However, users may * change the title and namespace by moving pages. The page id provides a * better clue to identify pages across history. * </p> * * @return title string */ @Override public String getTitle() { return raw.getTitle().getTitleStringWithoutNamespace(); } /** * <p> * Returns the id of the MediaWiki namespace of the revised page. The * meaning of this id depends on the configuration of the site that the page * is from. Usually, 0 is the main namespace. Even ids usually refer to * normal article pages while their odd successors represent the * corresponding talk namespace. * </p> * <p> * On a single MediaWiki site, the combination of page title and page * namespace is a key for a page at any given moment. However, users may * change the title and namespace by moving pages. The page id provides a * better clue to identify pages across history. * </p> * * @return integer namespace id */ @Override public int getNamespace() { return raw.getNamespace().getValue(); } /** * Returns the numeric page id of the revised page. For any given MediaWiki * site, pages are uniquely identified by their page id. MediaWiki will try * to preserve the page id even across title changes (moves). * * @return integer page id */ @Override public int getPageId() { return raw.getLocalId(); } /** * Returns the numeric id of the current revision. For any given MediaWiki * site, revisions are uniquely identified by their revision id. In * particular, two distinct revisions can never have the same id, even if * they belong to different pages. * * @return long revision id */ @Override public long getRevisionId() { return raw.getRevisionId(); } /** * Returns the time stamp at which the current revision was made. The time * stamp is a string that is formatted according to ISO 8601, such as * "2014-02-19T23:34:16Z". * * @return time stamp string */ @Override public String getTimeStamp() { return getISO8601StringForDate(raw.getLastEdit()); } /** * Returns the text content of the current revision. Traditionally, this is * a wiki text that is edited by users. More recently, however, other * formats, such as JSON, have been introduced by extensions like Wikibase. * The format of the text is specified by {@link #getFormat()}. To interpret * it properly, one should also know the content model, obtained from * {@link #getModel()}. * * @return text content of the revision */ @Override public String getText() { return raw.getBody(); } /** * Returns the content model of the revision. This specifies how the text * content should be interpreted. Content models are usually configured for * namespaces and thus remain rather stable across the history of a page. * However, a page could in principle change its content model over time and * every revision therefore specifies its own content model. All known * models require a single format, obtained from {@link #getFormat()}. * * @return content model as a string */ @Override public String getModel() { return raw.getModel(); } /** * Returns the format of the revision text. This string should be formatted * as a MIME media type. Typical examples are "application/json" (JSON) and * "text/x-wiki" (MediaWiki wikitext). To interpret the meaning of this * format, one should also consider the content model obtained by * {@link #getModel()}. Like the content model, the format might change * between revisions of a page, but this is very rare in practice. * * @return MIME type for revision text */ @Override public String getFormat() { return raw.getFormat(); } /** * Returns the comment string that was used for making the edit that led to * this revision. * * @return comment string */ @Override public String getComment() { return "Fake"; } /** * Returns the name for the contributor that made the edit that led to this * revision. This might be a user name or an IP address. This can be checked * using {@link #hasRegisteredContributor()}. * * @return contributor name or IP address */ @Override public String getContributor() { return "fake"; } /** * Returns the user id of the contributor who made the edit that led to this * revision, or -1 if the edit was not made by a registered user. * * @return user id or -1 for anonymous users */ @Override public int getContributorId() { return -1; } /** * Returns true if the contributor who made the edit that led to this * revision was logged in with a user account. False is returned if the * contributor was not logged in (in which case there is only an IP * address). * * @return true if the contributor was looged in */ @Override public boolean hasRegisteredContributor() { return false; } /** * Return an ISO 8601 combined date and time string for specified date/time * * From: https://gist.github.com/6124652.git * * @param date * Date * @return String with format "yyyy-MM-dd'T'HH:mm:ss'Z'" */ private static String getISO8601StringForDate(Date date) { DateFormat dateFormat = new SimpleDateFormat("yyyy-MM-dd'T'HH:mm:ss'Z'", Locale.US); dateFormat.setTimeZone(TimeZone.getTimeZone("UTC")); return dateFormat.format(date); } }