From 6b1b2be0f19bc44cc5994734dbb4f56a201a7acf Mon Sep 17 00:00:00 2001 From: Mathieu Loiseau <mathieu.loiseau@univ-grenoble-alpes.fr> Date: Sun, 2 May 2021 00:27:25 +0200 Subject: [PATCH] marche presque --- extract_notes.py | 154 +++++++++++++++++++++++++++++++++++++---------- 1 file changed, 123 insertions(+), 31 deletions(-) diff --git a/extract_notes.py b/extract_notes.py index 3096a1c..6dabbb0 100755 --- a/extract_notes.py +++ b/extract_notes.py @@ -6,11 +6,12 @@ import os, sys, os.path import getpass import sqlite3 import json +import bisect import re #regular expression parser = argparse.ArgumentParser(description="Extraire les notes d'une liseuse Vivlio") parser.add_argument("-s", "--source", help="le dossier de la liseuse", type=str, default=None) -parser.add_argument("-f","--format", help="le format d'export (JSON ou wiki)", type=str, default=None) +parser.add_argument("-f","--format", help="le format d'export (JSON ou wiki)", type=str, default="json") parser.add_argument("-d", "--destination", default=".", help="le chemin et nom du dossier d'export (un fichier par livre)", type=str) args = parser.parse_args() @@ -21,25 +22,26 @@ def getPage(anchor): output = output.group(1) return int(output) -class Annotation: - BM = "bookmark" #bookmark - QUOTE = "citation" #citation - NOTE = "note" #note +class Note: + BM = 1 #bookmark + QUOTE = 2 #citation + NOTE = 3 #note def __init__(self, dbString): tmp = json.loads(dbString) self.text = None self.endP = None + self.note = None if len(tmp)==3: - self.type = Annotation.NOTE + self.type = Note.NOTE tmp[0].update(tmp[1]) tmp[0]["note"] = tmp[2]["text"].replace("\n","") self.note = tmp[0]["note"] elif len(tmp[1]) == 1: - self.type = Annotation.BM + self.type = Note.BM else: tmp[0].update(tmp[1]) - self.type = Annotation.QUOTE + self.type = Note.QUOTE #TODO mistakes Crayon for a quote, quoting "crayon" self.raw = tmp[0] self.p = getPage(tmp[0]["anchor"]) @@ -50,14 +52,109 @@ class Annotation: if "text" in tmp[0] : self.text = tmp[0]["text"].replace("\n","") + def __lt__(self, note2): + if self.p > note2.p: + res = -1 + elif self.p < note2.p: + res = 1 + elif self.endP == note2.endP: + if self.type > note2.type: + res = -1 + elif self.type < note2.type: + res = 1 + else: + res = 0 + elif self.endP == None: + res = 1 + elif note2.endP == None: + res = -1 + elif self.endP > note2.endP: + res = -1 + else: + res = 1 + return res + def toJSON(self): - json_note = {"raw":self.raw,"type":self.type,"p":self.p} - if self.text != None : - json_note["text"] = self.text + json_note = {"type":self.type,"p":self.p} if self.endP != None : json_note["endP"] = self.endP + if self.text != None : + json_note["text"] = self.text + json_note["raw"] = self.raw return json_note + def toWiki(self): + wiki_note = "" + if self.type != Note.BM: + wiki_note = "{{p|"+str(self.p) + if self.endP != None and self.p!=self.endP: + wiki_note += "–"+str(self.endP) + wiki_note+="}}\n" + if self.text != None: + wiki_note += "«"+self.text+"»\n" + if self.note != None: + wiki_note += "{{commentaire|"+self.note+"}}\n" + return wiki_note + +class Book: + def __init__(self, author=None, title=None): + self.author = author + self.title = title + self.data = [] + + def equals(self, author, title): + return self.author == author and self.title == title + + def is_empty(self): + return len(self.data)==0 + + def empty(self): + self.data = [] + + def addNote(self, note): + bisect.insort_left(self.data,note) + + def fileName(self, directory="."): + res = False + if self.author == None: + if self.title == None: + raise ValueError("Can't export title and author-less book") + else: + res = self.title + elif self.title == None: + res = self.author + else: + res = self.author+"_"+self.title + return directory+"/"+res + + def store(self, directory=".", format="json"): + if format == "json": + self.JSON_store(self.fileName(directory)+".json") + elif format == "wiki": + self.wiki_store(self.fileName(directory)+".wiki") + else: + raise ValueError("Unexpected format: "+str(format)) + + def JSON_store(self, fileName): + content = {"author":self.author, "title":self.title, "data":[]} + for note in self.data: + content["data"].append(note.toJSON()) + out_file = open(fileName, "w") + json.dump(content, out_file, indent = 2, ensure_ascii=False) + out_file.close() + + def wiki_store(self, fileName): + out_file = open(fileName, "w") + out_file.write("="+self.author+" — "+self.title+"=\n") + for note in self.data: + out_file.write(note.toWiki()) + out_file.write("\n") + out_file.close() + + +##### +# Main functions +##### def getSource(): print("Warning, this only works for linux systems") basepath = "/media/"+getpass.getuser() @@ -81,7 +178,7 @@ def connect(sourceDir): path = sourceDir+'/system/config/books.db' conn = sqlite3.connect("file://"+path+'?mode=ro', uri=True) print("Connected to '"+path+"'.") - except (ValueError,sqlite3.Error) as e: + except (sqlite3.Error) as e: print (str(e)) conn = None return conn @@ -93,25 +190,17 @@ def retrieveData(conn): return cur.fetchall() #while conn.fetchone() -def dataToJSON(data, directory): - curfile = None +def export_data(data, directory, format="json"): + curbook = Book() for comment in data: - fileName = directory+"/"+comment[1]+"_"+comment[0]+".json" - if curfile != fileName: - if curfile != None: - JSON_store(curdata, curfile) - curfile = fileName - curdata = {"title":comment[0],"author":comment[1],"data":[]} - jj = Annotation(comment[2]) - curdata["data"].append(jj.toJSON()) - fileName = None - if curfile != None: - JSON_store(curdata, curfile) - -def JSON_store(data, filename): - out_file = open(filename, "w") - json.dump(data, out_file, indent = 2, ensure_ascii=False) - out_file.close() + if not curbook.equals(comment[1], comment[0]): + if not curbook.is_empty(): + curbook.store(directory, format) + curbook.empty() + curbook = Book(comment[1],comment[0]) + curbook.addNote(Note(comment[2])) + if curbook.is_empty(): + curbook.store(directory, format) #main if __name__ == "__main__": @@ -125,4 +214,7 @@ if __name__ == "__main__": data = retrieveData(conn) conn.close() if data != None: - dataToJSON(data, args.destination) + try: + export_data(data, args.destination, args.format) + except ValueError as e: + print(e) -- GitLab