diff --git a/extract_notes.py b/extract_notes.py index 759158d24051692b5b311ec850b7b5d9182ae20c..3096a1cb825c80a84d7a5185082e5c16645b2477 100755 --- a/extract_notes.py +++ b/extract_notes.py @@ -5,13 +5,59 @@ import argparse import os, sys, os.path import getpass import sqlite3 +import json +import re #regular expression parser = argparse.ArgumentParser(description="Extraire les notes d'une liseuse Vivlio") parser.add_argument("-s", "--source", help="le dossier de la liseuse", type=str, default=None) parser.add_argument("-f","--format", help="le format d'export (JSON ou wiki)", type=str, default=None) -parser.add_argument("-d", "--destination", default=None, help="le chemin et nom du dossier d'export (un fichier par livre)", type=str) +parser.add_argument("-d", "--destination", default=".", help="le chemin et nom du dossier d'export (un fichier par livre)", type=str) args = parser.parse_args() + +def getPage(anchor): + output = re.search("page\=(\d+)",anchor) + if output is not None: + output = output.group(1) + return int(output) + +class Annotation: + BM = "bookmark" #bookmark + QUOTE = "citation" #citation + NOTE = "note" #note + + def __init__(self, dbString): + tmp = json.loads(dbString) + self.text = None + self.endP = None + if len(tmp)==3: + self.type = Annotation.NOTE + tmp[0].update(tmp[1]) + tmp[0]["note"] = tmp[2]["text"].replace("\n","") + self.note = tmp[0]["note"] + elif len(tmp[1]) == 1: + self.type = Annotation.BM + else: + tmp[0].update(tmp[1]) + self.type = Annotation.QUOTE + #TODO mistakes Crayon for a quote, quoting "crayon" + self.raw = tmp[0] + self.p = getPage(tmp[0]["anchor"]) + if "begin" in tmp[0] : + self.p = getPage(tmp[0]["begin"]) + if "end" in tmp[0] : + self.endP = getPage(tmp[0]["end"]) + if "text" in tmp[0] : + self.text = tmp[0]["text"].replace("\n","") + + def toJSON(self): + json_note = {"raw":self.raw,"type":self.type,"p":self.p} + if self.text != None : + json_note["text"] = self.text + if self.endP != None : + json_note["endP"] = self.endP + return json_note + def getSource(): print("Warning, this only works for linux systems") basepath = "/media/"+getpass.getuser() @@ -33,16 +79,40 @@ def getSource(): def connect(sourceDir): try: path = sourceDir+'/system/config/books.db' - if not os.path.isfile(path): - raise ValueError ("Failed to connect to DB ("+path+").") - else: - conn = sqlite3.connect(path) - print("Connected to '"+path+"'.") + conn = sqlite3.connect("file://"+path+'?mode=ro', uri=True) + print("Connected to '"+path+"'.") except (ValueError,sqlite3.Error) as e: print (str(e)) conn = None return conn +def retrieveData(conn): + query = "SELECT `Books`.`Title` as `title`, `Books`.`Authors` as `author`, '['||GROUP_CONCAT(`Tags`.`Val`, ',')||']' as `data` FROM `Books`, `Items`, `Tags` WHERE (`Tags`.`TagID` = 101 OR `Tags`.`TagID` = 104 OR `Tags`.`TagID` = 105) AND `Tags`.`ItemID`= `Items`.`OID` AND `Items`.`ParentID`=`Books`.`OID` GROUP BY `Items`.`OID` ORDER BY `author`, `title`;" + cur = conn.cursor() + cur.execute(query) + return cur.fetchall() + #while conn.fetchone() + +def dataToJSON(data, directory): + curfile = None + for comment in data: + fileName = directory+"/"+comment[1]+"_"+comment[0]+".json" + if curfile != fileName: + if curfile != None: + JSON_store(curdata, curfile) + curfile = fileName + curdata = {"title":comment[0],"author":comment[1],"data":[]} + jj = Annotation(comment[2]) + curdata["data"].append(jj.toJSON()) + fileName = None + if curfile != None: + JSON_store(curdata, curfile) + +def JSON_store(data, filename): + out_file = open(filename, "w") + json.dump(data, out_file, indent = 2, ensure_ascii=False) + out_file.close() + #main if __name__ == "__main__": if args.source == None: @@ -52,4 +122,7 @@ if __name__ == "__main__": if source != None: conn = connect(source) if conn != None: + data = retrieveData(conn) conn.close() + if data != None: + dataToJSON(data, args.destination)