diff --git a/extract_notes.py b/extract_notes.py index 6dabbb0cae21098a90f504e9e6200d4733aa5c21..286800bed4558499cf612dfc71bccd42949d2dee 100755 --- a/extract_notes.py +++ b/extract_notes.py @@ -6,7 +6,7 @@ import os, sys, os.path import getpass import sqlite3 import json -import bisect +#import bisect import re #regular expression parser = argparse.ArgumentParser(description="Extraire les notes d'une liseuse Vivlio") @@ -19,19 +19,34 @@ args = parser.parse_args() def getPage(anchor): output = re.search("page\=(\d+)",anchor) if output is not None: - output = output.group(1) - return int(output) + output = int(output.group(1))+1 + return output + +def startChar(anchor): + output = re.search("offs\=(\d+)",anchor) + if output is not None: + output = int(output.group(1)) + return output + +def endChar(anchor): + output = re.search("over\=(\d+)",anchor) + if output is not None: + output = int(output.group(1)) + return output class Note: BM = 1 #bookmark QUOTE = 2 #citation NOTE = 3 #note - def __init__(self, dbString): + def __init__(self, id, dbString): tmp = json.loads(dbString) self.text = None self.endP = None self.note = None + self.startChar = None + self.endChar = None + self.id = id if len(tmp)==3: self.type = Note.NOTE tmp[0].update(tmp[1]) @@ -47,54 +62,68 @@ class Note: self.p = getPage(tmp[0]["anchor"]) if "begin" in tmp[0] : self.p = getPage(tmp[0]["begin"]) + self.startChar = startChar(tmp[0]["begin"]) if "end" in tmp[0] : self.endP = getPage(tmp[0]["end"]) + self.endChar = endChar(tmp[0]["end"]) if "text" in tmp[0] : self.text = tmp[0]["text"].replace("\n","") + #for comparison + def endP_int(self): + res = self.endP + if res == None: + res=-1 + return res + def startChar_int(self): + res = self.startChar + if res == None: + res=-1 + return res + def endChar_int(self): + res = self.endChar + if res == None: + res=-1 + return res + def __lt__(self, note2): - if self.p > note2.p: - res = -1 - elif self.p < note2.p: - res = 1 - elif self.endP == note2.endP: - if self.type > note2.type: - res = -1 - elif self.type < note2.type: - res = 1 - else: - res = 0 - elif self.endP == None: - res = 1 - elif note2.endP == None: - res = -1 - elif self.endP > note2.endP: - res = -1 + return (self.p, self.startChar_int(), self.endP_int(), self.endChar_int(), self.type, self. id) < (note2.p, note2.startChar_int(), note2.endP_int(), note2.endChar_int(), note2.type, note2. id) + + def getType(self): + if self.type == Note.BM: + res = "Bookmark" + elif self.type == Note.QUOTE: + res = "Quote" else: - res = 1 + res = "Note" return res def toJSON(self): - json_note = {"type":self.type,"p":self.p} + json_note = {"id":self.id, "type":self.getType(),"p":self.p} + if self.startChar!=None: + json_note["startChar"] = self.startChar if self.endP != None : json_note["endP"] = self.endP + if self.endChar!=None: + json_note["endChar"] = self.endChar if self.text != None : json_note["text"] = self.text json_note["raw"] = self.raw return json_note - def toWiki(self): - wiki_note = "" + def toWiki(self, writePageNumber = True): + wiki_note = "<p id='"+self.getType()+"_"+str(self.id)+"'>" if self.type != Note.BM: - wiki_note = "{{p|"+str(self.p) - if self.endP != None and self.p!=self.endP: - wiki_note += "–"+str(self.endP) - wiki_note+="}}\n" + if writePageNumber: + wiki_note += "{{p|"+str(self.p) + if self.endP != None and self.p!=self.endP: + wiki_note += "–"+str(self.endP) + wiki_note+="}}\n" if self.text != None: - wiki_note += "«"+self.text+"»\n" + wiki_note += "«"+self.text+"»" if self.note != None: - wiki_note += "{{commentaire|"+self.note+"}}\n" - return wiki_note + wiki_note += " {{commentaire|"+self.note+"}}" + return wiki_note+"</p>" class Book: def __init__(self, author=None, title=None): @@ -112,7 +141,8 @@ class Book: self.data = [] def addNote(self, note): - bisect.insort_left(self.data,note) + #bisect.insort_left(self.data,note) + self.data.append(note) def fileName(self, directory="."): res = False @@ -128,6 +158,7 @@ class Book: return directory+"/"+res def store(self, directory=".", format="json"): + self.data.sort() if format == "json": self.JSON_store(self.fileName(directory)+".json") elif format == "wiki": @@ -146,9 +177,12 @@ class Book: def wiki_store(self, fileName): out_file = open(fileName, "w") out_file.write("="+self.author+" — "+self.title+"=\n") + last_note = None for note in self.data: - out_file.write(note.toWiki()) + writePageNumber = not isinstance(last_note,Note) or (last_note.p != note.p or last_note.endP != note.endP) + out_file.write(note.toWiki(writePageNumber)) out_file.write("\n") + last_note = note out_file.close() @@ -184,7 +218,7 @@ def connect(sourceDir): return conn def retrieveData(conn): - query = "SELECT `Books`.`Title` as `title`, `Books`.`Authors` as `author`, '['||GROUP_CONCAT(`Tags`.`Val`, ',')||']' as `data` FROM `Books`, `Items`, `Tags` WHERE (`Tags`.`TagID` = 101 OR `Tags`.`TagID` = 104 OR `Tags`.`TagID` = 105) AND `Tags`.`ItemID`= `Items`.`OID` AND `Items`.`ParentID`=`Books`.`OID` GROUP BY `Items`.`OID` ORDER BY `author`, `title`;" + query = "SELECT `Books`.`Title` as `title`, `Books`.`Authors` as `author`, '['||GROUP_CONCAT(`Tags`.`Val`, ',')||']' as `data`, `Tags`.`ItemID` as `id` FROM `Books`, `Items`, `Tags` WHERE (`Tags`.`TagID` = 101 OR `Tags`.`TagID` = 104 OR `Tags`.`TagID` = 105) AND `Tags`.`ItemID`= `Items`.`OID` AND `Items`.`ParentID`=`Books`.`OID` GROUP BY `Items`.`OID` ORDER BY `author`, `title`, `Tags`.`TimeEdt`;" cur = conn.cursor() cur.execute(query) return cur.fetchall() @@ -198,7 +232,7 @@ def export_data(data, directory, format="json"): curbook.store(directory, format) curbook.empty() curbook = Book(comment[1],comment[0]) - curbook.addNote(Note(comment[2])) + curbook.addNote(Note(comment[3],comment[2])) if curbook.is_empty(): curbook.store(directory, format)