Skip to content
Snippets Groups Projects
class.R 23.32 KiB
library(R6)
library(httr)
library(stringr)


#'Mediawikir: write R output to mediawiki
#'
#'@docType class
#'@importFrom R6 R6Class
#'@importFrom httr POST
#'@importFrom httr stop_for_status
#'@importFrom httr upload_file
#'@importFrom httr content
#'@importFrom stringr str_replace
#'@export
#'@keywords mediawiki
#'@description A Mediawikir controller serves to connect to a mediawiki API as a registered user in order to edit automatically certain documents based on R scripts
#'@format An \code{\link{R6Class}} generator object
#'@return Object of \code{\link{R6Class}} with methods to interact with mediawiki instance
#'@field base_url the mediawiki instance base url
#'@field api the mediawiki instance API base url
#'@field auth_token the current token used to edit the wiki (cf. \href{https://www.mediawiki.org/wiki/Manual:Edit_token}{Mediawiki API documentation})
#'@field page the page (name or id) which will be modified (including its namespace, if the name is provided)
#'@field content the future content of \code{page}, which will be incrementally constructed using object methods
#'@field user_name the user to credit for the changes
#'@examples
#'  mwHandler <- Mediawikir$new("http://my.wiki.org", "reg_user", "mypassword")
#'  mwHandler$setPage("wikiPage")
#'  mwHandler$addContent("This is an introduction to Mediawikir")
#'  mwHandler$addContent("First step","h2")
#'  mwHandler$addContent("First step of the first step","h3")
#'  fileName <- mwHandler$uploadFile("path/toFile.png","My beautiful image.png")
#'  mwHandler$addContent(paste("[[",fileName,"]]",sep=""))
#'  mwHandler$postContent()
#'  mwHandler$replaceSingleLineTemplateField("attribute", "newValue")
#'@section Methods:
#'\describe{
#'  \item{\code{initialize(instance_url, user, pass)}}{Creates a new \pkg{Mediawikir} object out of:
#'  \describe{
#'    \item{\code{instance_url}}{The base url to the mediawiki instance}
#'    \item{\code{user}}{The screen name of the user who will be credited for the changes in the wiki}
#'    \item{\code{pass}}{The user's password}
#'    \item{\code{dir}}{In case of \href{https://www.mediawiki.org/wiki/Manual:Short_URL}{short Url configuration},
#'    the base url for mediawiki is not the actual directory containing the api and index.php files. In these cases,
#'    the user should specify the installation directory of mediawiki from the base_url.}}}
#'  \item{\code{setPage(pageNameOrId)}}{To set the page for the next post
#'  \describe{
#'    \item{\code{pageNameOrId}}{The name (or id) of the wiki page which contains the changes}
#'  }}
#'  \item{\code{connect(pass)}}{Should not be called directly (see \code{initialize} for detail)}
#'  \item{\code{resetContent(baseContent="")}}{To reset the content of the page for the next post
#'  \describe{
#'    \item{\code{baseContent}}{The wikitext base for the next post}
#'  }}
#'  \item{\code{addContent(theContent, type="p")}}{To add content that will later on be posted to mediawiki. This is where structuring elements are provided
#'  \describe{
#'    \item{\code{theContent}}{Either text (using mediawiki syntax) or path to a file}
#'    \item{\code{type}}{\describe{
#'      \item{\code{p} → A plain paragraph.}{}
#'      \item{\code{h2} → Level 2 title}{}
#'      \item{\code{h3} → Level 3 title}{}
#'      \item{\code{h4} → Level 4 title}{}
#'      \item{\code{h5} → Level 5 title}{}
#'      \item{\code{raw} → No editing of the content}{}
#'    }}
#'  }}
#'  \item{\code{postContent(comment)}}{To replace the content of the \code{page} (attribute) with \code{content} (attribute).
#'  Parameter \code{comment} specifies the description of the modification, default is \code{“R generated content”}}
#'  \item{\code{uploadFile(file_path, file_name)}}{To upload a file to the server (will overwrite existing files)
#'  \describe{
#'    \item{\code{file_path}}{Path to the file to upload to the server}
#'    \item{\code{file_name}}{Name under which to store it}
#'    \item{Returns the title of the wiki page containing the file or \code{FALSE} on error}{}
#'  }}
#'  \item{\code{getContent()}}{Returns the \emph{wikitext} of the selected page (cf. \code{setPage})}
#'  \item{\code{replaceSingleLineTemplateField(template, field, newValue)}}{To replace a field of a template in the selected \code{page}.
#'
#'         This method does not work with numbered parameters. If the sought field does not exist, adds it at the beginning of the template.
#'
#'         Right now it works only for pages that only contain one occurence of the parameter (it does not take outer context into account)
#'
#'         Additionnally, this function is not made for templates the value of which spans accross multiple lines.
#'  \describe{
#'    \item{\code{template}}{The template to look for}
#'    \item{\code{field}}{The name of the field concerned}
#'    \item{\code{newValue}}{The new value of the field}
#'  }}
#'}
Mediawikir <- R6Class("Mediawikir",
 public = list(
   base_url = NULL,
   api = NULL,
   user_name = NULL,
   auth_token = NULL,
   page = "",
   content = "",
   connect = function(pass){
     token <- 1
     "une super méthode"
     tryCatch(
       { #get login token
         query <- httr::POST(self$api,
                             body = list(action="query",
                                         meta="tokens",
                                         type="login",
                                         lgname = self$user_name,
                                         format= "json"))
         httr::stop_for_status(query)
         #parse query response to retrieve token
         response <- httr::content(query, "parsed", "application/json")
         token <- response$query$tokens$logintoken
         #login query
         query <- httr::POST(self$api,
                             body = list(action="clientlogin",
                                         username = self$user_name,
                                         password = pass,
                                         logintoken = token,
                                         loginreturnurl = self$base_url,
                                         format= "json"))
         httr::stop_for_status(query)
         #parse query response to make sure login worked
         response <- httr::content(query, "parsed", "application/json")
         if(response$clientlogin$status!="PASS"){
           stop(paste(response$clientlogin,"\n"))
         }
       },
       error = function(e)
       {
         print(paste("Could not login",self$user_name,"to", self$api, e$message))
         token <- 0
       }
     )
     return(token)
   },

   ######
   # initialize
   ######
   initialize = function (instance_url, user, pass, dir=""){
     tryCatch({
       if(dir !=""){
         self$extra_sep <- paste(instance_url,"/",dir ,sep="")
       }
       else{
         self$base_url <- instance_url
       }
       self$api <- paste(self$base_url,"/","api.php",sep="")
       self$user_name <- user
       self$auth_token <- self$connect(pass)
     },
     error = function(e){
       print(e$message)
       self <- FALSE
     })
   },

   ######
   # setPage
   ######
   setPage = function(pageNameOrId){
     self$page <- pageNameOrId
   },

   ######
   # resetContent
   ######
   resetContent = function(aContent=""){
     self$content <- aContent
   },

   ######
   # addContent
   ######
   addContent = function(theContent, type="p"){
     switch(type,
       h2 = theContent <- paste("==", theContent, "=="),
       h3 = theContent <- paste("===", theContent, "==="),
       h4 = theContent <- paste("====", theContent, "===="),
       h5 = theContent <- paste("=====", theContent, "====="),
       p  = theContent <- paste(theContent, "\n", sep="")
     )
     if(type != "raw"){
       self$content <- paste(self$content, theContent, sep="\n")
     }
     else{
       self$content <- paste(self$content, theContent, sep="")
     }
   },

   ######
   # postContent
   ######
   postContent = function(comment="R generated content"){
     success <- TRUE
     tryCatch(
       { #Verify that a page has been selected
         if(self$page == ""){
           warning("No page to write to, use “setPage” method before calling “postContent”.")
         }
         else{
           if(is.character(self$page)){
             fields <- list(action  = "query",
                            prop    = "info|revisions",
                            meta    = "tokens",
                            rvprop  = "timestamp",
                            titles  = self$page,
                            format  = "json")
           }
           else{
             fields <- list(action  = "query",
                            prop    = "info|revisions",
                            meta    = "tokens",
                            rvprop  = "timestamp",
                            pageids  = self$page,
                            format  = "json")
           }
           #get edit token
           query <- httr::POST(self$api,
                               body = fields)
           httr::stop_for_status(query)
           response <- httr::content(query, "parsed", "application/json")
           #perform edit
           if(is.character(self$page)){
             fields <- list(action        = "edit",
                            title         = self$page,
                            #basetimestamp = "TODO",
                            summary       = comment,
                            format        = "json",
                            text          = self$content,
                            token         = response$query$tokens$csrftoken)
           }
           else{
             fields <- list(action   = "edit",
                            pageid   = self$page,
                            #basetimestamp   = "TODO",
                            summary  = comment,
                            format   = "json",
                            text     = self$content,
                            token    = response$query$tokens$csrftoken)
           }
           query <- httr::POST(self$api,
                               body = fields)
           httr::stop_for_status(query)
           response <- httr::content(query, "parsed", "application/json")
           if(response$edit$result != "Success"){
             stop(paste("Failed to edit ",self$page,"\n",response))
           }
         }
       },
       error = function(e){
         print(e$message)
         success <- FALSE
       })
     return(success)
   },

   ######
   # getContent
   ######
   getContent = function(){
     success <- TRUE
     tryCatch(
       { #Verify that a page has been selected
         if(self$page == ""){
           warning("No page to update to, use “setPage” method before calling “getContent”.")
         }
         else{
           if(is.character(self$page)){
             fields <- list(action  = "raw",
                            title  = self$page)
           }
           else{
             fields <- list(action  = "raw",
                            curid = self$page)
           }
           #get edit token
           query <- httr::POST(paste(self$base_url,"index.php",sep="/"),
                               body = fields)
           httr::stop_for_status(query)
           response <- httr::content(query)
           success <- response
         }
       },
       error = function(e){
         print(e$message)
         success <- FALSE
       })
     return(success)
   },

   ######
   #' Replaces a field of a template in the selected \code{page}.
   #'
   #' This method does not work with numbered parameters. If the sought field does not exist, adds it at the beginning of the template.
   #'
   #' Right now it works only for pages that only contain one occurence of the parameter (it does not take outer context into account)
   #'
   #' Additionnally, this function is not made for templates the value of which spans accross multiple lines.
   #' @param field The name of the field concerned
   #' @param newValue The new value of the field
   #' @return A message logging operations
   #' @examples
   #' obj <- Mediawikir$new("http://wiki.lezinter.net", "user","pass","installDir")
   #' obj$setPage("Title of the page to modify (or its ID)")
   #' obj$replaceSingleLineTemplateField("attribute","new value")
   #' #Will replace |attribute=* by |attribute=new value (or create it)
   ######
   replaceSingleLineTemplateField = function(field="", newValue=""){
     success <- self$getContent()
     if(success != FALSE){
       search <- paste("\\|", field, "\\=[^\\|\\}\\n]*", sep="")
       replace <- paste("|", field, "=",newValue, sep="")
       formerText <- success
       newText <- str_replace(formerText, search, replace)
       if(newText == formerText){
         #The parameter does not exist, adding it as a first parameter
         success <- paste("Could not find field “", field, "”. Adding it as a first parameter → ", sep="")
         newText <- str_replace(formerText, "(\\{\\{[^\\|\\n]*)(\\s|\\n)*\\|", paste("\\1\n  |", field, "=",newValue,"\n  |",sep=""))
       }
       else{
         success <- ""
       }
       if(newText != formerText){
         self$resetContent()
         self$addContent(newText, "raw")
         success <- paste(success,self$postContent(paste(field," → ", newValue, sep="")))
       }
       else{
        success <- paste(success,"No modification of ",self$page," while trying to set “",field,"” to “",newValue,"”.",sep="")
       }
     }
     return(success)
   },

   ######
   # uploadFile
   ######
   uploadFile = function(file_path, file_name){
     success <- TRUE
     tryCatch(
       { #get edit token
         query <- httr::POST(self$api,
                             body = list(action  = "query",
                                         prop    = "info|revisions",
                                         meta    = "tokens",
                                         rvprop  = "timestamp",
                                         titles  = paste("file:",file_name,sep=""),
                                         format  = "json"))
         httr::stop_for_status(query)
         response <- httr::content(query, "parsed", "application/json")
         #perform edit
         the_file <- upload_file(file_path)
         query <- httr::POST(self$api,
                             body = list(action   = "upload",
                                         filename = file_name,
                                         comment  = "R generated content",
                                         format   = "json",
                                         ignorewarnings = 1,
                                         file     = the_file,
                                         token    = response$query$tokens$csrftoken,
                                         ignorewarnings = 1),
                             encode = "multipart")
         httr::stop_for_status(query)
         response <- httr::content(query, "parsed", "application/json")
         if(response$upload$result != "Success"){
           stop(paste("Failed to upload ",file_path,"\n",response))
         }
         else{
           success <- paste("File:",file_name,sep="")
         }
       },
       error = function(e){
         print(e$message)
         success <- FALSE
       })
     return(success)
   }
 )
)

#'Batchator: Batch input to mediawiki based on a csv and a template
#'
#'@docType class
#'@export
#'@keywords mediawiki
#'@description Uses \link{Mediawikir} to create/edit a batch of mediawiki pages
#'@format An \code{\link{R6Class}} generator object
#'@return Object of \code{\link{R6Class}} with methods to interact with mediawiki instance
#'@field author the mediawiki user who will be the author of the pages/modifications
#'@field wiki a mediawikir object
#'@field page_name_pattern a pattern to name the pages generated
#'@field template the content of a text file with the template for the wikitext of the pages to create in a batch
#'@field field_list the list of symbols that will be replaced (the first line of the csv file that will be sent to the Batchator)
#'@field content a matrix of the words that will replace the symbols in the templates (both content and page names — the next lines of the csv file)
#'@examples
#'    bachata <- Batchator$new("http://my.wiki", "user","pass")
#'    bachata$loadTemplate("/path/to/template.wiki")
#'    bachata$loadContent("/path/to/content.csv")
#'    bachata$setPageNamePattern("Project:Media/Sounds/~word~ (~voice_artist~)")
#'    bachata$applyTemplate()
#'    #content of "template.wiki" : {{menuLUCIOLE}}
#'    # {{Sound info
#'    #   |title=~word~ (~voice_artist~)
#'    #   |type=word
#'    #   |text=~word~
#'    #   |language=en
#'    #   |character=~char~
#'    #   |validation=recorded
#'    #   |theme=~theme~}}
#'    #content of "content.csv"
#'    #"~word~","~theme~","~char~","~voice_artist~" ← the field list (see they are used in the templates above)
#'    #"Zero","numbers","Awful Owen","Carlos"
#'    #"One","numbers","Awful Owen","Carlos"
#'    #"One","numbers","Black Betty","Carla"
#'    #3 pages are created in this example…
#'    batchata$modifyWikiTemplateAttributes("list.csv")
#'    #content of "list.csv"
#'    #"Page","Att1","Att2"
#'    #2204,"New value for att1","New value for att2"
#'    #"Help:full/page/Name","New value for att1","New value for att2"
#'@section Methods:
#'\describe{
#'  \item{\code{initialize(wiki,author,pass)}}{Creates a new Batchator object (and its Mediawikir object as an attribute)
#'  \describe{
#'    \item{\code{wiki}}{The base url to the mediawiki instance}
#'    \item{\code{authr}}{The screen name of the user who will be credited for the changes in the wiki}
#'    \item{\code{pass}}{The user's password}
#'    \item{\code{dir}}{The installation directory of mediawiki, if needed}
#'  }}
#'  \item{\code{loadTemplate(file_path)}}{Takes the path of a text file containing a template that will serve as base
#'  for all the pages generated. The template should contain occurrences of the fields defined in the content file.}
#'  \item{\code{setPageNamePattern(naming_pattern)}}{Takes a string of the parttern for the page names that will be generated.
#'  The template should contain occurrences of the fields defined in the content file.}
#'  \item{\code{loadContent(file_path)}}{Takes the path to a csv file that contains both the list of fields used in the templates (1st line)
#'  AND the data for each of those fields (following lines).}
#'  \item{\code{applyTemplate()}}{Applies the templates defined using \code{loadTemplate}, \code{setPageNamePattern} to the data loaded with \code{loadContent}.
#'        It should be noted that if the page exists it is replaced.}
#'  \item{\code{modifyWikiTemplateAttributes(file_path)}}{Reads the csv file pointed by file_path and replaces in each page listed in the Page column the content
#'  first occurence of each field with the corresponding value.
#'
#'  \strong{Warning:} This function is dependant on \code{\link{replaceSingleLineTemplateField}}, which is very basic.}
#'}
#'@importFrom stringr str_replace_all

Batchator <- R6Class("Batchator",
  public = list(
    author = NULL,
    wiki = NULL,
    page_name_pattern=NULL,
    template = NULL,
    field_list = NULL,
    content = NULL,

    ######
    # initialize
    ######
    initialize = function (wiki,author,pass,dir=""){
      tryCatch({
        self$wiki <- Mediawikir$new(wiki, author, pass, dir)
      },
      error = function(e){
        print(e$message)
        self <- FALSE
      })
    },

    ######
    # loadTemplate
    ######
    loadTemplate = function(file_path){
      tryCatch({
        self$template <- readChar(file_path, file.info(file_path)$size)
      },
      error = function(e){
        print(paste(e$message, "No template loaded"))
      })
    },

    ######
    # setPageNamePattern
    ######
    setPageNamePattern = function(naming_pattern){
      self$page_name_pattern = naming_pattern
    },

    ######
    # loadContent
    ######
    loadContent = function(file_path){
      tryCatch({
        csvM <- read.csv(file=file_path, header = FALSE)
        if(ncol(csvM) == 1){
          self$field_list <- as.vector(csvM[1,])
          self$content <- as.vector(csvM[-1,])
        }
        else{
          self$field_list <- as.vector(t(csvM[1,]))
          self$content <- csvM[-1,]
        }
      },
      error = function(e){
        print(paste(e$message, "Batchator:loadContent → No content loaded"))
      })
    },

    ######
    # applyTemplate
    ######
    applyTemplate = function(){
      tryCatch({
        if (is.null(self$content) |
            is.null(self$field_list) |
            is.null(self$page_name_pattern) |
            is.null(self$template)) {
          warning("Either no content or no templates, use loadContent, setPageNamePattern and/or loadTemplate.")
        }
        else{
          if(!is.null(nrow(self$content))){
            for(i in 1:nrow(self$content)){
              self$wiki$resetContent(str_replace_all(self$template,
                                                     setNames(as.vector(t(self$content[i,])),
                                                              self$field_list)))
              self$wiki$setPage(str_replace_all(self$page_name_pattern,
                                                setNames(as.vector(t(self$content[i,])),
                                                         self$field_list)))
              self$wiki$postContent()
            }
          }
          else{
            for(i in 1:length(self$content)){
              self$wiki$resetContent(str_replace_all(self$template,
                                                     setNames(as.vector(t(self$content[i])),
                                                              self$field_list)))
              self$wiki$setPage(str_replace_all(self$page_name_pattern,
                                                setNames(as.vector(t(self$content[i])),
                                                         self$field_list)))
              self$wiki$postContent()
            }
          }
        }
      },
      error = function(e){
        print(e$message)
      })
    },
    ######
    # modifyWikiTemplateAttributes
    ######
    modifyWikiTemplateAttributes = function(file_path){
      tryCatch({
        csvM <- read.csv(file=file_path, header = FALSE)
        if(ncol(csvM) == 1){
          stop(paste("'",file_path,"' only has one column.",sep=""))
        }
        else{
          field_list <- as.vector(t(csvM[1,]))
          if( (field_list[1] != "Page")
              && (field_list[1] != "page")){
            stop(paste("1st column name of '",file_path,"' should be “Page” (",field_list[1],").",sep=""))
          }
          rows <- csvM[-1,]
          for(i in 1:nrow(rows)){
            pageId <- toString(rows[i,1])
            pageIdType <- "page name"
            if(grepl("^[0-9]+$", pageId, perl = T)){
              pageId <- as.integer(pageId)
              pageIdType <- "page ID"
            }
            self$wiki$setPage(pageId)
            print(paste("processing page '",pageId,"' (",pageIdType,")", sep=""))#/**/
            for(j in 2:length(field_list)){
              #self$wiki$addContent("test") #debug
              #test = self$wiki$postContent() #debug
              #print(test) #debug
              success = self$wiki$replaceSingleLineTemplateField(toString(field_list[j]),toString(rows[i,j]))
              print(paste("new value : |",field_list[j],"=",rows[i,j]," → ",success,sep=""))
            }
          }
        }
      },
      error = function(e){
        print(paste(e$message, "Batchator.modifyWikiTemplateAttributes → No content loaded"))
      })

    }
  )
)