package org.apache.zeppelin.notebook.repo;
import com.google.gson.Gson;
import com.google.gson.GsonBuilder;
import com.mongodb.MongoBulkWriteException;
import com.mongodb.MongoClient;
import com.mongodb.MongoClientURI;
import com.mongodb.bulk.BulkWriteError;
import com.mongodb.client.MongoCollection;
import com.mongodb.client.MongoCursor;
import com.mongodb.client.MongoDatabase;
import static com.mongodb.client.model.Filters.eq;
import static com.mongodb.client.model.Filters.type;
import static com.mongodb.client.model.Filters.in;
import com.mongodb.client.model.InsertManyOptions;
import com.mongodb.client.model.UpdateOptions;
import org.apache.zeppelin.conf.ZeppelinConfiguration;
import org.apache.zeppelin.notebook.Note;
import org.apache.zeppelin.notebook.NoteInfo;
import org.apache.zeppelin.notebook.NotebookImportDeserializer;
import org.apache.zeppelin.notebook.Paragraph;
import org.apache.zeppelin.notebook.ApplicationState;
import org.apache.zeppelin.scheduler.Job;
import org.apache.zeppelin.user.AuthenticationInfo;
import org.bson.BsonType;
import org.bson.Document;
import org.bson.types.ObjectId;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import java.io.IOException;
import java.util.*;
import java.util.regex.Matcher;
import java.util.regex.Pattern;
/**
* Backend for storing Notebook on MongoDB
*/
public class MongoNotebookRepo implements NotebookRepo {
private static final Logger LOG = LoggerFactory.getLogger(MongoNotebookRepo.class);
private final ZeppelinConfiguration conf;
private final MongoClient mongo;
private final MongoDatabase db;
private final MongoCollection<Document> coll;
public MongoNotebookRepo(ZeppelinConfiguration conf) throws IOException {
this.conf = conf;
mongo = new MongoClient(new MongoClientURI(conf.getMongoUri()));
db = mongo.getDatabase(conf.getMongoDatabase());
coll = db.getCollection(conf.getMongoCollection());
if (conf.getMongoAutoimport()) {
// import local notes into MongoDB
insertFileSystemNotes();
}
}
/**
* If environment variable ZEPPELIN_NOTEBOOK_MONGO_AUTOIMPORT is true,
* this method will insert local notes into MongoDB on startup.
* If a note already exists in MongoDB, skip it.
*/
private void insertFileSystemNotes() throws IOException {
LinkedList<Document> docs = new LinkedList<>(); // docs to be imported
NotebookRepo vfsRepo = new VFSNotebookRepo(this.conf);
List<NoteInfo> infos = vfsRepo.list(null);
// collect notes to be imported
for (NoteInfo info : infos) {
Note note = vfsRepo.get(info.getId(), null);
Document doc = noteToDocument(note);
docs.add(doc);
}
/*
* 'ordered(false)' option allows to proceed bulk inserting even though
* there are duplicated documents. The duplicated documents will be skipped
* and print a WARN log.
*/
try {
coll.insertMany(docs, new InsertManyOptions().ordered(false));
} catch (MongoBulkWriteException e) {
printDuplicatedException(e); //print duplicated document warning log
}
vfsRepo.close(); // it does nothing for now but maybe in the future...
}
/**
* MongoBulkWriteException contains error messages that inform
* which documents were duplicated. This method catches those ID and print them.
* @param e
*/
private void printDuplicatedException(MongoBulkWriteException e) {
List<BulkWriteError> errors = e.getWriteErrors();
for (BulkWriteError error : errors) {
String msg = error.getMessage();
Pattern pattern = Pattern.compile("[A-Z0-9]{9}"); // regex for note ID
Matcher matcher = pattern.matcher(msg);
if (matcher.find()) { // if there were a note ID
String noteId = matcher.group();
LOG.warn("Note " + noteId + " not inserted since already exists in MongoDB");
}
}
}
@Override
public List<NoteInfo> list(AuthenticationInfo subject) throws IOException {
syncId();
List<NoteInfo> infos = new LinkedList<>();
MongoCursor<Document> cursor = coll.find().iterator();
while (cursor.hasNext()) {
Document doc = cursor.next();
Note note = documentToNote(doc);
NoteInfo info = new NoteInfo(note);
infos.add(info);
}
cursor.close();
return infos;
}
/**
* Find documents of which type of _id is object ID, and change it to note ID.
* Since updating _id field is not allowed, remove original documents and insert
* new ones with string _id(note ID)
*/
private void syncId() {
// find documents whose id type is object id
MongoCursor<Document> cursor = coll.find(type("_id", BsonType.OBJECT_ID)).iterator();
// if there is no such document, exit
if (!cursor.hasNext())
return;
List<ObjectId> oldDocIds = new LinkedList<>(); // document ids need to update
List<Document> updatedDocs = new LinkedList<>(); // new documents to be inserted
while (cursor.hasNext()) {
Document doc = cursor.next();
// store original _id
ObjectId oldId = doc.getObjectId("_id");
oldDocIds.add(oldId);
// store the document with string _id (note id)
String noteId = doc.getString("id");
doc.put("_id", noteId);
updatedDocs.add(doc);
}
coll.insertMany(updatedDocs);
coll.deleteMany(in("_id", oldDocIds));
cursor.close();
}
/**
* Convert document to note
*/
private Note documentToNote(Document doc) {
// document to JSON
String json = doc.toJson();
// JSON to note
Gson gson = new GsonBuilder()
.registerTypeAdapter(Date.class, new NotebookImportDeserializer())
.create();
Note note = gson.fromJson(json, Note.class);
for (Paragraph p : note.getParagraphs()) {
if (p.getStatus() == Job.Status.PENDING || p.getStatus() == Job.Status.RUNNING) {
p.setStatus(Job.Status.ABORT);
}
List<ApplicationState> appStates = p.getAllApplicationStates();
if (appStates != null) {
for (ApplicationState app : appStates) {
if (app.getStatus() != ApplicationState.Status.ERROR) {
app.setStatus(ApplicationState.Status.UNLOADED);
}
}
}
}
return note;
}
/**
* Convert note to document
*/
private Document noteToDocument(Note note) {
// note to JSON
Gson gson = new GsonBuilder().create();
String json = gson.toJson(note);
// JSON to document
Document doc = Document.parse(json);
// set object id as note id
doc.put("_id", note.getId());
return doc;
}
@Override
public Note get(String noteId, AuthenticationInfo subject) throws IOException {
Document doc = coll.find(eq("_id", noteId)).first();
if (doc == null) {
throw new IOException("Note " + noteId + "not found");
}
return documentToNote(doc);
}
@Override
public void save(Note note, AuthenticationInfo subject) throws IOException {
Document doc = noteToDocument(note);
coll.replaceOne(eq("_id", note.getId()), doc, new UpdateOptions().upsert(true));
}
@Override
public void remove(String noteId, AuthenticationInfo subject) throws IOException {
coll.deleteOne(eq("_id", noteId));
}
@Override
public void close() {
mongo.close();
}
@Override
public Revision checkpoint(String noteId, String checkpointMsg, AuthenticationInfo subject)
throws IOException {
// no-op
LOG.warn("Checkpoint feature isn't supported in {}", this.getClass().toString());
return Revision.EMPTY;
}
@Override
public Note get(String noteId, String revId, AuthenticationInfo subject) throws IOException {
LOG.warn("Get note revision feature isn't supported in {}", this.getClass().toString());
return null;
}
@Override
public List<Revision> revisionHistory(String noteId, AuthenticationInfo subject) {
LOG.warn("Get Note revisions feature isn't supported in {}", this.getClass().toString());
return Collections.emptyList();
}
@Override
public Note setNoteRevision(String noteId, String revId, AuthenticationInfo subject)
throws IOException {
// Auto-generated method stub
return null;
}
@Override
public List<NotebookRepoSettingsInfo> getSettings(AuthenticationInfo subject) {
LOG.warn("Method not implemented");
return Collections.emptyList();
}
@Override
public void updateSettings(Map<String, String> settings, AuthenticationInfo subject) {
LOG.warn("Method not implemented");
}
}