/*
* Licensed to the Apache Software Foundation (ASF) under one or more
* contributor license agreements. See the NOTICE file distributed with
* this work for additional information regarding copyright ownership.
* The ASF licenses this file to You under the Apache License, Version 2.0
* (the "License"); you may not use this file except in compliance with
* the License. You may obtain a copy of the License at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.apache.zeppelin.notebook.repo;
import java.io.IOException;
import java.lang.reflect.Constructor;
import java.lang.reflect.InvocationTargetException;
import java.util.ArrayList;
import java.util.Collections;
import java.util.Date;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Set;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.conf.ZeppelinConfiguration.ConfVars;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
import org.apache.zeppelin.notebook.NotebookAuthorization;
import org.apache.zeppelin.notebook.Paragraph;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import com.google.common.collect.Lists;
/**
* Notebook repository sync with remote storage
*/
public class NotebookRepoSync implements NotebookRepo {
private static final Logger LOG = LoggerFactory.getLogger(NotebookRepoSync.class);
private static final int maxRepoNum = 2;
private static final String pushKey = "pushNoteIds";
private static final String pullKey = "pullNoteIds";
private static final String delDstKey = "delDstNoteIds";
private static ZeppelinConfiguration config;
private static final String defaultStorage = "org.apache.zeppelin.notebook.repo.GitNotebookRepo";
private List<NotebookRepo> repos = new ArrayList<>();
private final boolean oneWaySync;
/**
* @param conf
*/
@SuppressWarnings("static-access")
public NotebookRepoSync(ZeppelinConfiguration conf) {
config = conf;
oneWaySync = conf.getBoolean(ConfVars.ZEPPELIN_NOTEBOOK_ONE_WAY_SYNC);
String allStorageClassNames = conf.getString(ConfVars.ZEPPELIN_NOTEBOOK_STORAGE).trim();
if (allStorageClassNames.isEmpty()) {
allStorageClassNames = defaultStorage;
LOG.warn("Empty ZEPPELIN_NOTEBOOK_STORAGE conf parameter, using default {}", defaultStorage);
}
String[] storageClassNames = allStorageClassNames.split(",");
if (storageClassNames.length > getMaxRepoNum()) {
LOG.warn("Unsupported number {} of storage classes in ZEPPELIN_NOTEBOOK_STORAGE : {}\n" +
"first {} will be used", storageClassNames.length, allStorageClassNames, getMaxRepoNum());
}
for (int i = 0; i < Math.min(storageClassNames.length, getMaxRepoNum()); i++) {
Class<?> notebookStorageClass;
try {
notebookStorageClass = getClass().forName(storageClassNames[i].trim());
Constructor<?> constructor = notebookStorageClass.getConstructor(
ZeppelinConfiguration.class);
repos.add((NotebookRepo) constructor.newInstance(conf));
} catch (ClassNotFoundException | NoSuchMethodException | SecurityException |
InstantiationException | IllegalAccessException | IllegalArgumentException |
InvocationTargetException e) {
LOG.warn("Failed to initialize {} notebook storage class", storageClassNames[i], e);
}
}
// couldn't initialize any storage, use default
if (getRepoCount() == 0) {
LOG.info("No storage could be initialized, using default {} storage", defaultStorage);
initializeDefaultStorage(conf);
}
// sync for anonymous mode on start
if (getRepoCount() > 1 && conf.getBoolean(ConfVars.ZEPPELIN_ANONYMOUS_ALLOWED)) {
try {
sync(AuthenticationInfo.ANONYMOUS);
} catch (IOException e) {
LOG.error("Couldn't sync on start ", e);
}
}
}
@SuppressWarnings("static-access")
private void initializeDefaultStorage(ZeppelinConfiguration conf) {
Class<?> notebookStorageClass;
try {
notebookStorageClass = getClass().forName(defaultStorage);
Constructor<?> constructor = notebookStorageClass.getConstructor(
ZeppelinConfiguration.class);
repos.add((NotebookRepo) constructor.newInstance(conf));
} catch (ClassNotFoundException | NoSuchMethodException | SecurityException |
InstantiationException | IllegalAccessException | IllegalArgumentException |
InvocationTargetException e) {
LOG.warn("Failed to initialize {} notebook storage class {}", defaultStorage, e);
}
}
public List<NotebookRepoWithSettings> getNotebookRepos(AuthenticationInfo subject) {
List<NotebookRepoWithSettings> reposSetting = Lists.newArrayList();
NotebookRepoWithSettings repoWithSettings;
for (NotebookRepo repo : repos) {
repoWithSettings = NotebookRepoWithSettings
.builder(repo.getClass().getSimpleName())
.className(repo.getClass().getName())
.settings(repo.getSettings(subject))
.build();
reposSetting.add(repoWithSettings);
}
return reposSetting;
}
public NotebookRepoWithSettings updateNotebookRepo(String name, Map<String, String> settings,
AuthenticationInfo subject) {
NotebookRepoWithSettings updatedSettings = NotebookRepoWithSettings.EMPTY;
for (NotebookRepo repo : repos) {
if (repo.getClass().getName().equals(name)) {
repo.updateSettings(settings, subject);
updatedSettings = NotebookRepoWithSettings
.builder(repo.getClass().getSimpleName())
.className(repo.getClass().getName())
.settings(repo.getSettings(subject))
.build();
break;
}
}
return updatedSettings;
}
/**
* Lists Notebooks from the first repository
*/
@Override
public List<NoteInfo> list(AuthenticationInfo subject) throws IOException {
return getRepo(0).list(subject);
}
/* list from specific repo (for tests) */
List<NoteInfo> list(int repoIndex, AuthenticationInfo subject) throws IOException {
return getRepo(repoIndex).list(subject);
}
/**
* Returns from Notebook from the first repository
*/
@Override
public Note get(String noteId, AuthenticationInfo subject) throws IOException {
return getRepo(0).get(noteId, subject);
}
/* get note from specific repo (for tests) */
Note get(int repoIndex, String noteId, AuthenticationInfo subject) throws IOException {
return getRepo(repoIndex).get(noteId, subject);
}
/**
* Saves to all repositories
*/
@Override
public void save(Note note, AuthenticationInfo subject) throws IOException {
getRepo(0).save(note, subject);
if (getRepoCount() > 1) {
try {
getRepo(1).save(note, subject);
}
catch (IOException e) {
LOG.info(e.getMessage() + ": Failed to write to secondary storage");
}
}
}
/* save note to specific repo (for tests) */
void save(int repoIndex, Note note, AuthenticationInfo subject) throws IOException {
getRepo(repoIndex).save(note, subject);
}
@Override
public void remove(String noteId, AuthenticationInfo subject) throws IOException {
for (NotebookRepo repo : repos) {
repo.remove(noteId, subject);
}
/* TODO(khalid): handle case when removing from secondary storage fails */
}
void remove(int repoIndex, String noteId, AuthenticationInfo subject) throws IOException {
getRepo(repoIndex).remove(noteId, subject);
}
/**
* Copies new/updated notes from source to destination storage
*
* @throws IOException
*/
void sync(int sourceRepoIndex, int destRepoIndex, AuthenticationInfo subject) throws IOException {
LOG.info("Sync started");
NotebookAuthorization auth = NotebookAuthorization.getInstance();
NotebookRepo srcRepo = getRepo(sourceRepoIndex);
NotebookRepo dstRepo = getRepo(destRepoIndex);
List <NoteInfo> allSrcNotes = srcRepo.list(subject);
List <NoteInfo> srcNotes = auth.filterByUser(allSrcNotes, subject);
List <NoteInfo> dstNotes = dstRepo.list(subject);
Map<String, List<String>> noteIds = notesCheckDiff(srcNotes, srcRepo, dstNotes, dstRepo,
subject);
List<String> pushNoteIds = noteIds.get(pushKey);
List<String> pullNoteIds = noteIds.get(pullKey);
List<String> delDstNoteIds = noteIds.get(delDstKey);
if (!pushNoteIds.isEmpty()) {
LOG.info("Notes with the following IDs will be pushed");
for (String id : pushNoteIds) {
LOG.info("ID : " + id);
}
pushNotes(subject, pushNoteIds, srcRepo, dstRepo, false);
} else {
LOG.info("Nothing to push");
}
if (!pullNoteIds.isEmpty()) {
LOG.info("Notes with the following IDs will be pulled");
for (String id : pullNoteIds) {
LOG.info("ID : " + id);
}
pushNotes(subject, pullNoteIds, dstRepo, srcRepo, true);
} else {
LOG.info("Nothing to pull");
}
if (!delDstNoteIds.isEmpty()) {
LOG.info("Notes with the following IDs will be deleted from dest");
for (String id : delDstNoteIds) {
LOG.info("ID : " + id);
}
deleteNotes(subject, delDstNoteIds, dstRepo);
} else {
LOG.info("Nothing to delete from dest");
}
LOG.info("Sync ended");
}
public void sync(AuthenticationInfo subject) throws IOException {
sync(0, 1, subject);
}
private void pushNotes(AuthenticationInfo subject, List<String> ids, NotebookRepo localRepo,
NotebookRepo remoteRepo, boolean setPermissions) {
for (String id : ids) {
try {
remoteRepo.save(localRepo.get(id, subject), subject);
if (setPermissions && emptyNoteAcl(id)) {
makePrivate(id, subject);
}
} catch (IOException e) {
LOG.error("Failed to push note to storage, moving onto next one", e);
}
}
}
private boolean emptyNoteAcl(String noteId) {
NotebookAuthorization notebookAuthorization = NotebookAuthorization.getInstance();
return notebookAuthorization.getOwners(noteId).isEmpty()
&& notebookAuthorization.getReaders(noteId).isEmpty()
&& notebookAuthorization.getWriters(noteId).isEmpty();
}
private void makePrivate(String noteId, AuthenticationInfo subject) {
if (AuthenticationInfo.isAnonymous(subject)) {
LOG.info("User is anonymous, permissions are not set for pulled notes");
return;
}
NotebookAuthorization notebookAuthorization = NotebookAuthorization.getInstance();
Set<String> users = notebookAuthorization.getOwners(noteId);
users.add(subject.getUser());
notebookAuthorization.setOwners(noteId, users);
users = notebookAuthorization.getReaders(noteId);
users.add(subject.getUser());
notebookAuthorization.setReaders(noteId, users);
users = notebookAuthorization.getWriters(noteId);
users.add(subject.getUser());
notebookAuthorization.setWriters(noteId, users);
}
private void deleteNotes(AuthenticationInfo subject, List<String> ids, NotebookRepo repo)
throws IOException {
for (String id : ids) {
repo.remove(id, subject);
}
}
public int getRepoCount() {
return repos.size();
}
int getMaxRepoNum() {
return maxRepoNum;
}
NotebookRepo getRepo(int repoIndex) throws IOException {
if (repoIndex < 0 || repoIndex >= getRepoCount()) {
throw new IOException("Requested storage index " + repoIndex
+ " isn't initialized," + " repository count is " + getRepoCount());
}
return repos.get(repoIndex);
}
private Map<String, List<String>> notesCheckDiff(List<NoteInfo> sourceNotes,
NotebookRepo sourceRepo, List<NoteInfo> destNotes, NotebookRepo destRepo,
AuthenticationInfo subject) {
List <String> pushIDs = new ArrayList<>();
List <String> pullIDs = new ArrayList<>();
List <String> delDstIDs = new ArrayList<>();
NoteInfo dnote;
Date sdate, ddate;
for (NoteInfo snote : sourceNotes) {
dnote = containsID(destNotes, snote.getId());
if (dnote != null) {
try {
/* note exists in source and destination storage systems */
sdate = lastModificationDate(sourceRepo.get(snote.getId(), subject));
ddate = lastModificationDate(destRepo.get(dnote.getId(), subject));
} catch (IOException e) {
LOG.error("Cannot access previously listed note {} from storage ", dnote.getId(), e);
continue;
}
if (sdate.compareTo(ddate) != 0) {
if (sdate.after(ddate) || oneWaySync) {
/* if source contains more up to date note - push
* if oneWaySync is enabled, always push no matter who's newer */
pushIDs.add(snote.getId());
LOG.info("Modified note is added to push list : " + sdate);
} else {
/* destination contains more up to date note - pull */
LOG.info("Modified note is added to pull list : " + ddate);
pullIDs.add(snote.getId());
}
}
} else {
/* note exists in source storage, and absent in destination
* view source as up to date - push
* (another scenario : note was deleted from destination - not considered)*/
pushIDs.add(snote.getId());
}
}
for (NoteInfo note : destNotes) {
dnote = containsID(sourceNotes, note.getId());
if (dnote == null) {
/* note exists in destination storage, and absent in source */
if (oneWaySync) {
/* if oneWaySync is enabled, delete the note from destination */
LOG.info("Extraneous note is added to delete dest list : " + note.getId());
delDstIDs.add(note.getId());
} else {
/* if oneWaySync is disabled, pull the note from destination */
LOG.info("Missing note is added to pull list : " + note.getId());
pullIDs.add(note.getId());
}
}
}
Map<String, List<String>> map = new HashMap<>();
map.put(pushKey, pushIDs);
map.put(pullKey, pullIDs);
map.put(delDstKey, delDstIDs);
return map;
}
private NoteInfo containsID(List <NoteInfo> notes, String id) {
for (NoteInfo note : notes) {
if (note.getId().equals(id)) {
return note;
}
}
return null;
}
/**
* checks latest modification date based on Paragraph fields
* @return -Date
*/
private Date lastModificationDate(Note note) {
Date latest = new Date(0L);
Date tempCreated, tempStarted, tempFinished;
for (Paragraph paragraph : note.getParagraphs()) {
tempCreated = paragraph.getDateCreated();
tempStarted = paragraph.getDateStarted();
tempFinished = paragraph.getDateFinished();
if (tempCreated != null && tempCreated.after(latest)) {
latest = tempCreated;
}
if (tempStarted != null && tempStarted.after(latest)) {
latest = tempStarted;
}
if (tempFinished != null && tempFinished.after(latest)) {
latest = tempFinished;
}
}
return latest;
}
@Override
public void close() {
LOG.info("Closing all notebook storages");
for (NotebookRepo repo: repos) {
repo.close();
}
}
//checkpoint to all available storages
@Override
public Revision checkpoint(String noteId, String checkpointMsg, AuthenticationInfo subject)
throws IOException {
int repoCount = getRepoCount();
int repoBound = Math.min(repoCount, getMaxRepoNum());
int errorCount = 0;
String errorMessage = "";
List<Revision> allRepoCheckpoints = new ArrayList<>();
Revision rev = null;
for (int i = 0; i < repoBound; i++) {
try {
allRepoCheckpoints.add(getRepo(i).checkpoint(noteId, checkpointMsg, subject));
} catch (IOException e) {
LOG.warn("Couldn't checkpoint in {} storage with index {} for note {}",
getRepo(i).getClass().toString(), i, noteId);
errorMessage += "Error on storage class " + getRepo(i).getClass().toString() +
" with index " + i + " : " + e.getMessage() + "\n";
errorCount++;
}
}
// throw exception if failed to commit for all initialized repos
if (errorCount == repoBound) {
throw new IOException(errorMessage);
}
if (allRepoCheckpoints.size() > 0) {
rev = allRepoCheckpoints.get(0);
// if failed to checkpoint on first storage, then return result on second
if (allRepoCheckpoints.size() > 1 && rev == null) {
rev = allRepoCheckpoints.get(1);
}
}
return rev;
}
@Override
public Note get(String noteId, String revId, AuthenticationInfo subject) {
Note revisionNote = null;
try {
revisionNote = getRepo(0).get(noteId, revId, subject);
} catch (IOException e) {
LOG.error("Failed to get revision {} of note {}", revId, noteId, e);
}
return revisionNote;
}
@Override
public List<Revision> revisionHistory(String noteId, AuthenticationInfo subject) {
List<Revision> revisions = Collections.emptyList();
try {
revisions = getRepo(0).revisionHistory(noteId, subject);
} catch (IOException e) {
LOG.error("Failed to list revision history", e);
}
return revisions;
}
@Override
public List<NotebookRepoSettingsInfo> getSettings(AuthenticationInfo subject) {
List<NotebookRepoSettingsInfo> repoSettings = Collections.emptyList();
try {
repoSettings = getRepo(0).getSettings(subject);
} catch (IOException e) {
LOG.error("Cannot get notebook repo settings", e);
}
return repoSettings;
}
@Override
public void updateSettings(Map<String, String> settings, AuthenticationInfo subject) {
try {
getRepo(0).updateSettings(settings, subject);
} catch (IOException e) {
LOG.error("Cannot update notebook repo settings", e);
}
}
@Override
public Note setNoteRevision(String noteId, String revId, AuthenticationInfo subject)
throws IOException {
int repoCount = getRepoCount();
int repoBound = Math.min(repoCount, getMaxRepoNum());
Note currentNote = null, revisionNote = null;
for (int i = 0; i < repoBound; i++) {
try {
currentNote = getRepo(i).setNoteRevision(noteId, revId, subject);
} catch (IOException e) {
// already logged
currentNote = null;
}
// second condition assures that fist successful is returned
if (currentNote != null && revisionNote == null) {
revisionNote = currentNote;
}
}
return revisionNote;
}
}