Predict (#22)

* predict method added to rulelist/set * added dev_mindmap and simplified rule flavoring
talegari · May 25, 2024 · a09aa2e · a09aa2e
1 parent b52adcb
commit a09aa2e
Show file tree

Hide file tree

Showing 19 changed files with 676 additions and 67 deletions.
diff --git a/DESCRIPTION b/DESCRIPTION
@@ -1,7 +1,7 @@
 Package: tidyrules
 Type: Package
 Title: Obtain Rules from Rule Based Models as Tidy Dataframe
-Version: 0.2.0
+Version: 0.2.1
 Authors@R: c(
     person("Srikanth", "Komala Sheshachala", email = "[email protected]", role = c("aut", "cre")),
     person("Amith Kumar", "Ullur Raghavendra", email = "[email protected]", role = c("aut"))
@@ -17,7 +17,7 @@ Imports:
     generics (>= 0.1.3),
     checkmate (>= 2.3.1),
     tidytable (>= 0.11.0),
-    data.table (>= 1.14.6)
+    data.table (>= 1.14.6),
 Suggests:
     AmesHousing (>= 0.0.3),
     dplyr (>= 0.8),
@@ -31,6 +31,7 @@ Suggests:
     mlbench (>= 2.1.1),
     knitr (>= 1.23), 
     rmarkdown (>= 1.13),
+    palmerpenguins (>= 0.1.1),
 Description: Utility to convert text based summary of rule based models to a rulelist or ruleset dataframe (where each row represents a rule) with related metrics such as support, confidence and lift. Rule based models from these packages are supported: 'C5.0', 'rpart' and 'Cubist'.
 URL: https://github.com/talegari/tidyrules
 BugReports: https://github.com/talegari/tidyrules/issues

diff --git a/NAMESPACE b/NAMESPACE
@@ -1,13 +1,17 @@
 # Generated by roxygen2: do not edit by hand
 
+S3method(predict,rulelist)
+S3method(predict,ruleset)
 S3method(print,rulelist)
 S3method(print,ruleset)
 S3method(tidy,C5.0)
 S3method(tidy,cubist)
 S3method(tidy,rpart)
+export(convert_rule_flavor)
 export(tidy)
 export(varSpec)
 importFrom(data.table,":=")
 importFrom(generics,tidy)
 importFrom(magrittr,"%>%")
 importFrom(rlang,"%||%")
+importFrom(utils,data)
diff --git a/R/dev_mindmap.R b/R/dev_mindmap.R
@@ -0,0 +1,44 @@
+################################################################################
+# This is the part of the 'tidyrules' R package hosted at
+# https://github.com/talegari/tidyrules with GPL-3 license.
+################################################################################
+
+# Structure
+#
+# Model/fitted object to rules should happens via 'tidy' call
+# We get the generic from generics::tidy
+# Rules object will be one among: ruleset/rulelist.
+# This is a wrapper over tidytable/dataframe.
+#
+# Methods for rulelist/set: print, predict, augment
+# At high level, predict returns the rule_nbr for a row_nbr in new_data
+# At high level, augment (TODO) returns some metrics on new_data as new column
+#
+# Models:
+#
+# C5
+#   - (rulelist when fitted with rules = TRUE) -- implemented
+#   - (ruleset when fitted with rules = FALSE) -- NOT implemented
+#
+# rpart
+#   - (ruleset with classification aka class) -- implemented
+#   - (ruleset with regression aka anova)     -- implemented
+#   - (ruleset with poisson)                  -- NOT implemented
+#   - (ruleset with survival)                 -- NOT implemented
+#   - (ruleset with exp)                      -- NOT implemented
+#   - (ruleset with used defined split)       -- NOT implemented
+#
+# party
+#   - (ruleset with classification)           -- NOT implemented
+#   - (ruleset with regression)               -- NOT implemented
+#   - (ruleset with survival)                 -- NOT implemented
+#   - (ruleset with used defined split)       -- NOT implemented
+#
+# cubist
+#   - (ruleset with regression)               -- implemented
+#
+# ranger
+#   - (rulelist)                              -- NOT implemented
+#
+# sirus
+#   - (ruleset ??)                            -- NOT implemented
diff --git a/R/globals.R b/R/globals.R
@@ -1,3 +1,8 @@
+################################################################################
+# This is the part of the 'tidyrules' R package hosted at
+# https://github.com/talegari/tidyrules with GPL-3 license.
+################################################################################
+
 utils::globalVariables(c(".",
                          "LHS",
                          "RHS",
@@ -12,6 +17,10 @@ utils::globalVariables(c(".",
                          "support",
                          "trial_number",
                          "yval",
-                         "confidence"
+                         "confidence",
+                         "rn__",
+                         "row_nbr",
+                         "pref__",
+                         "data"
                          )
                        )
diff --git a/R/package.R b/R/package.R
@@ -9,6 +9,7 @@
 #' @importFrom magrittr %>%
 #' @importFrom rlang %||%
 #' @importFrom data.table :=
+#' @importFrom utils data
 "_PACKAGE"
 
 list.rules.party = getFromNamespace(".list.rules.party", "partykit")
diff --git a/R/rule_translators.R b/R/rule_translators.R
@@ -1,33 +1,33 @@
-#' @name ruleRToPython
-#' @title Convert a R parsable rule to python parsable rule
-#' @description Expected to be passed to `pd.query` method of pandas dataframe
+#' @name convert_rule_flavor
+#' @title Convert a R parsable rule to python/sql parsable rule
+#' @description Convert a R parsable rule to python/sql parsable rule
 #' @param rule (chr vector) R parsable rule(s)
-#' @return (chr vector) Python parsable rule(s)
-ruleRToPython = function(rule){
+#' @param flavor (string) One among: 'python', 'sql'
+#' @return (chr vector) of rules
+#' @export
+convert_rule_flavor = function(rule, flavor){
 
-  res = rule %>%
-    stringr::str_replace_all("%in%", "in") %>%
-    stringr::str_replace_all("c\\(", "[") %>%
-    stringr::str_replace_all("\\)", "]") %>%
-    stringr::str_replace_all("&", "and")
+  checkmate::assert_character(rule)
+  checkmate::assert_string(flavor)
+  flavor = stringr::str_to_lower(flavor)
+  checkmate::assert_choice(flavor, c("python", "sql"))
 
+  if (flavor == "python"){
+    res =
+      rule %>%
+      stringr::str_replace_all("%in%", "in") %>%
+      stringr::str_replace_all("c\\(", "[") %>%
+      stringr::str_replace_all("\\)", "]") %>%
+      stringr::str_replace_all("&", "and")
 
-  return(res)
-}
-
-#' @name ruleRToSQL
-#' @title Convert a R parsable rule to SQL parsable rule
-#' @description Expected to be passed after SQL 'WHERE' clause
-#' @param rule (chr vector) R parsable rule(s)
-#' @return (chr vector) SQL parsable rule(s) as a 'WHERE' clause
-ruleRToSQL = function(rule){
-
-  res = rule %>%
-    stringr::str_replace_all("==", "=") %>%
-    stringr::str_replace_all("%in%", "IN") %>%
-    stringr::str_replace_all("c\\(", "(") %>%
-    stringr::str_replace_all("&", "AND")
-
+  } else if (flavor == "sql"){
+    res =
+      rule %>%
+      stringr::str_replace_all("==", "=") %>%
+      stringr::str_replace_all("%in%", "IN") %>%
+      stringr::str_replace_all("c\\(", "(") %>%
+      stringr::str_replace_all("&", "AND")
+  }
 
   return(res)
 }