summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorLeon Rische <leon.rische@me.com>2020-05-02 21:05:35 +0200
committerLeon Rische <leon.rische@me.com>2020-05-02 21:05:35 +0200
commit4bdd0f0e9ed4ddcb0773e1813eb787a22c49cfc9 (patch)
tree5f2989fad21f4d4e5ee76d810f479cd641f84065
parenta2aa55e2855e415ee52a46e36bdfa2060a01f744 (diff)
Replace AWK scripts with single indexer script
-rw-r--r--awk/files.awk18
-rw-r--r--awk/index.awk183
-rw-r--r--awk/index_cards.awk59
-rw-r--r--awk/index_positions.awk115
-rw-r--r--awk/stats_cards.awk50
-rw-r--r--awk/stats_positions.awk54
-rw-r--r--org-fc.el203
7 files changed, 263 insertions, 419 deletions
diff --git a/awk/files.awk b/awk/files.awk
deleted file mode 100644
index 6926408..0000000
--- a/awk/files.awk
+++ /dev/null
@@ -1,18 +0,0 @@
-BEGIN {
- FS="|";
-}
-
-BEGINFILE {
- has_card = 0;
-}
-
-# Flashcard headings
-/^\*+ .*:fc:.*$/ {
- has_card = 1;
-}
-
-ENDFILE {
- if (has_card == 1) {
- print FILENAME;
- }
-}
diff --git a/awk/index.awk b/awk/index.awk
new file mode 100644
index 0000000..6559f3c
--- /dev/null
+++ b/awk/index.awk
@@ -0,0 +1,183 @@
+BEGIN {
+ # The only time we're interested in multiple fields is when
+ # parsing the review data drawer.
+ FS="|";
+
+ now = strftime("%FT%TZ", systime(), 1);
+
+ fc_tag = ":" or_default(fc_tag, "fc") ":";
+ suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
+ review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
+ type_property = or_default(type_property, "FC_TYPE");
+ created_property = or_default(created_property, "FC_CREATED");
+
+ # Small state machine to make sure cards are in the correct format
+ state = 0;
+ state_file = 0;
+ state_card = 1;
+ state_properties = 2;
+ state_properties_done = 3;
+ state_review_data = 4;
+ state_review_data_done = 5;
+
+ print "(";
+}
+
+# Convert an ISO8601 timestamp to an Emacs timestamp
+# (second_upper_16_bit, second_lower_16_bit)
+function parse_time(time) {
+ # mktime expects a format of "YYYY MM DD HH MM SS"
+ # and doesn't care about the trailing space left by the "Z"
+ gsub(/[\-T:Z]/, " ", time);
+
+ ts = mktime(time, 1);
+ ts_h = rshift(ts, 16);
+ ts_l = and(ts, 0xffff);
+
+ return "(" ts_h " " ts_l ")";
+}
+
+## File Parsing
+
+BEGINFILE {
+ # Data for files is only printed once we have encountered the
+ # first card.
+ printed_file_line = 0;
+ needs_file_closing = 0;
+
+ # Stack of parent headline tags, level 0 is used for filetags
+ parent_tags[0] = "";
+ state = state_file;
+}
+
+ENDFILE {
+ if (needs_file_closing) {
+ print " ))";
+ needs_file_closing = 0;
+ }
+}
+
+## Filetags
+
+match($0, /#\+FILETAGS:[ \t]+(.*)/, a) {
+ parent_tags[0] = a[1];
+ next;
+}
+
+## Heading Parsing
+
+match($0, /^(\*)+[ \t]+.*$/, a) {
+ level = length(a[1]);
+ tags = "";
+
+ # tag re based on org-tag-re
+ # TODO: Do this in a single match
+ if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) {
+ tags = b[1];
+ }
+ parent_tags[level] = tags;
+
+ id = "none";
+
+ if (tags ~ fc_tag) {
+ state = state_card;
+ suspended = (tags ~ suspended_tag);
+ }
+ next;
+}
+
+## Drawer Parsing
+
+/:PROPERTIES:/ {
+ if (state == state_card) {
+ state = state_properties;
+ delete properties;
+ }
+ next;
+}
+
+$0 ~ review_data_drawer {
+ # Make sure the review data comes after the property drawer
+ if (state == state_properties_done) {
+ delete review_data;
+ review_index = 1;
+ state = state_review_data;
+ }
+ next;
+}
+
+/:END:/ {
+ if (state == state_properties) {
+ state = state_properties_done;
+ } else if (state == state_review_data) {
+ state = state_review_data_done;
+ # If this is the first card in a file, print the file "header"
+ if (!printed_file_line) {
+ print " (" \
+ ":path \"" FILENAME "\"" \
+ " :cards (";
+ printed_file_line = 1;
+ needs_file_closing = 1;
+ }
+
+ # Card header
+ inherited_tags = "";
+ for (i = 0; i < level; i++) {
+ inherited_tags = combine_tags(inherited_tags, parent_tags[i]);
+ }
+ local_tags = parent_tags[level];
+
+ print " (" \
+ ":id \"" properties["ID"] "\"" \
+ " :type " properties[type_property] \
+ " :created " parse_time(properties[created_property]) \
+ " :suspended " (suspended ? "t" : "nil") \
+ " :inherited-tags \"" inherited_tags "\"" \
+ " :local-tags \"" local_tags "\"" \
+ " :positions (";
+
+ # Card positions
+ for (i = 1; i < review_index; i++) {
+ print " (" \
+ ":position \"" review_data[i]["position"] "\"" \
+ " :ease " review_data[i]["ease"] \
+ " :box " review_data[i]["box"] \
+ " :interval " review_data[i]["interval"] \
+ " :due " parse_time(review_data[i]["due"]) \
+ ")"
+ }
+ print " ))";
+ }
+ next;
+}
+
+## Property Parsing
+
+(state == state_properties) && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
+ properties[a[1]] = trim_surrounding(a[2]);
+ next;
+}
+
+## Review data parsing
+
+# TODO: Explicit match, to check for broken drawers
+#
+# Positions are collected in an array first,
+# in case the review drawer is broken.
+(state == state_review_data) && /^\|.*\|$/ {
+ # check NF to skip the |--+--| table separator
+ # match on $2 to skip the table header
+ if (NF == 7 && $2 !~ "position") {
+ review_data[review_index]["position"] = trim($2);
+ review_data[review_index]["ease"] = trim($3);
+ review_data[review_index]["box"] = trim($4);
+ review_data[review_index]["interval"] = trim($5);
+ review_data[review_index]["due"] = trim_surrounding($6);
+ review_index += 1;
+ }
+ next;
+}
+
+END {
+ print ")";
+}
diff --git a/awk/index_cards.awk b/awk/index_cards.awk
deleted file mode 100644
index 0785543..0000000
--- a/awk/index_cards.awk
+++ /dev/null
@@ -1,59 +0,0 @@
-BEGIN {
- FS="|";
-
- fc_tag = ":" or_default(fc_tag, "fc") ":";
- suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
- review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
- type_property = or_default(type_property, "FC_TYPE");
- created_property = or_default(created_property, "FC_CREATED");
-
- print "(";
-}
-
-## Heading Parsing
-
-/^\*+[ \t]+.*$/ {
- # tag re based on org-tag-re
- match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, a)
- tags = a[1]
- id = "none";
-
- if (tags ~ fc_tag) {
- in_card = 1;
- suspended = (tags ~ suspended_tag);
- } else {
- in_card = 0;
- }
- next
-}
-
-## Property parsing
-
-in_card && /:PROPERTIES:/ {
- in_properties = 1;
- delete properties;
-}
-
-in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
- properties[a[1]] = trim_surrounding(a[2]);
-}
-
-in_properties && /:END:/ {
- id = properties["ID"];
- type = properties[type_property];
- created = properties[created_property];
- print " (" \
- ":path \"" FILENAME "\"" \
- " :id \"" id "\"" \
- " :type " type \
- " :suspended " (suspended ? "t" : "nil") \
- " :created \"" created "\"" \
- ")"
-
- in_properties = 0;
- in_card = 0;
-}
-
-END {
- print ")";
-}
diff --git a/awk/index_positions.awk b/awk/index_positions.awk
deleted file mode 100644
index c1c883c..0000000
--- a/awk/index_positions.awk
+++ /dev/null
@@ -1,115 +0,0 @@
-BEGIN {
- FS="|";
- now = strftime("%FT%TZ", systime(), 1);
-
- fc_tag = ":" or_default(fc_tag, "fc") ":";
- suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
- review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
- type_property = or_default(type_property, "FC_TYPE");
- created_property = or_default(created_property, "FC_CREATED");
-
- print "(";
-}
-
-BEGINFILE {
- # Stack of parent headline tags, level 0 is used for filetags
- parent_tags[0] = "";
-}
-
-## Filetags
-
-match($0, /#\+FILETAGS:[ \t]+(.*)/, a) {
- parent_tags[0] = a[1];
-}
-
-## Heading Parsing
-
-match($0, /^(\*)+[ \t]+.*$/, a) {
- level = length(a[1]);
- tags = ""
-
- # tag re based on org-tag-re
- if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) {
- tags = b[1];
- }
- parent_tags[level] = tags;
-
- id = "none";
-
- if (tags ~ fc_tag) {
- in_card = 1;
- suspended = (tags ~ suspended_tag);
- } else {
- in_card = 0;
- }
-
- last_level = level;
- next
-}
-
-## Property parsing
-
-in_card && /:PROPERTIES:/ {
- in_properties = 1;
- delete properties;
-}
-
-in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
- properties[a[1]] = trim_surrounding(a[2]);
-}
-
-in_properties && /:END:/ {
- in_properties = 0;
-}
-
-## Review data parsing
-
-in_card && $0 ~ review_data_drawer {
- in_data = 1;
-}
-
-in_data && /:END:/ {
- in_data = 0;
-}
-
-in_data && /^\|.*\|$/ {
- # Make sure we're inside a data block,
- # check NF to skip the |--+--| table separator
- # match on $2 to skip the table header
- if (in_data == 1 && NF == 7 && $2 !~ "position") {
- id = properties["ID"];
- type = properties[type_property];
-
- position = trim($2);
- ease = trim($3);
- box = trim($4);
- interval = trim($5);
- due = trim_surrounding($6);
-
- inherited_tags = "";
- for (i = 0; i < level; i++) {
- inherited_tags = combine_tags(inherited_tags, parent_tags[i]);
- }
- local_tags = parent_tags[level];
-
- if (!(filter_due == "1") || (due < now && suspended == "0")) {
- print "(" \
- ":path \"" FILENAME "\"" \
- " :id \"" id "\"" \
- " :type " type \
- " :suspended " (suspended ? "t" : "nil") \
- " :position \"" position "\"" \
- " :ease " ease \
- " :box " box \
- " :interval " interval \
- " :due \"" due "\"" \
- " :inherited-tags \"" inherited_tags "\"" \
- " :local-tags \"" local_tags "\"" \
- ")"
- }
- }
-}
-
-END {
- print ")";
-}
diff --git a/awk/stats_cards.awk b/awk/stats_cards.awk
deleted file mode 100644
index 31a0047..0000000
--- a/awk/stats_cards.awk
+++ /dev/null
@@ -1,50 +0,0 @@
-BEGIN {
- FS="\t";
- total = 0;
- n_suspended = 0;
-
- t_day = time_days_ago(1);
- t_week = time_days_ago(7);
- t_month = time_days_ago(30);
-
- created["day"] = 0;
- created["week"] = 0;
- created["month"] = 0;
-}
-
-{
- total += 1;
-
- type = $3;
- by_type[type] += 1;
- suspended = $4 == "1";
-
- if (suspended) {
- n_suspended++;
- } else {
- if ($5 > t_day) {
- created["day"]++;
- }
-
- if ($5 > t_week) {
- created["week"]++;
- }
-
- if ($5 > t_month) {
- created["month"]++;
- }
- }
-}
-
-END {
- print "("
- print " :total " total;
- print " :suspended " n_suspended;
- print " :created-day " created["day"];
- print " :created-week " created["week"];
- print " :created-month " created["month"];
- for (var in by_type) {
- print " :type-" var " " by_type[var];
- }
- print ")"
-}
diff --git a/awk/stats_positions.awk b/awk/stats_positions.awk
deleted file mode 100644
index c276f87..0000000
--- a/awk/stats_positions.awk
+++ /dev/null
@@ -1,54 +0,0 @@
-BEGIN {
- FS="\t";
- total = 0;
- suspended = 0;
- ease = 0;
- interval = 0;
- box = 0;
- due = 0;
- now = strftime("%FT%TZ", systime(), 1);
- n_stats = 0;
-}
-
-{
- total += 1;
-
- type = $3;
- by_type[type] += 1;
-
- # Don't collect ease / box / interval stats for suspended cards
- if ($4 == "1") {
- suspended += 1;
- } else {
- ease += $6;
- box += $7;
- interval += $8;
- n_stats++;
- }
-
-
- if ($4 == "0" && $9 < now) {
- due += 1;
- }
-}
-
-END {
- print "("
- print " :total " total;
- print " :suspended " suspended;
- print " :due " due;
- for (var in by_type) {
- print " :type-" var " " by_type[var];
- }
-
- if (n_stats > 0) {
- print " :avg-ease " ease / n_stats;
- print " :avg-box " box / n_stats;
- print " :avg-interval " interval / n_stats;
- } else {
- print " :avg-ease " 0.0;
- print " :avg-box " 0;
- print " :avg-interval " 0.0;
- }
- print ")"
-}
diff --git a/org-fc.el b/org-fc.el
index ea675e0..9241ec6 100644
--- a/org-fc.el
+++ b/org-fc.el
@@ -1191,121 +1191,44 @@ file (absolute path) as input."
(defun org-fc-awk--xargs (command)
"Generate the shell command for calling COMMAND with xargs."
- (concat "xargs -n 2500 -P 4 -0 " command))
-
-;;;; TSV Parsing
-
-(defun org-fc-tsv--parse-element (header element)
- "Parse an ELEMENT of a row given a single HEADER element."
- (if (listp header)
- (pcase (cdr header)
- ('string element)
- ('date (parse-iso8601-time-string element))
- ('number (string-to-number element))
- ('symbol (intern element))
- ('keyword (intern (concat ":" element)))
- ('bool (string= element "1"))
- ('tags (split-string element ":" t)))
- element))
-
-(defun org-fc-tsv--parse-row (headers elements)
- "Convert two lists of HEADERS and ELEMENTS into a plist.
-Each element is parsed using its header specification."
- (if (null headers)
- '()
- (let ((header (car headers)))
- (cl-assert (not (null elements)))
- `(,(if (listp header) (car header) header)
- ,(org-fc-tsv--parse-element header (car elements))
- .
- ,(org-fc-tsv--parse-row (cdr headers) (cdr elements))))))
-
-(defun org-fc-tsv-parse (headers input)
- "Parse a tsv INPUT into a plist, give a list of HEADERS."
- (mapcar
- (lambda (row) (org-fc-tsv--parse-row headers (split-string row "\t")))
- (split-string input "\n" t)))
+ (concat "xargs -0 " command))
-;;;; AWK Wrapper Functions
-
-(cl-defun org-fc-awk-cards (&optional (paths org-fc-directories))
- "List all cards in PATHS."
- (mapcar
- (lambda (pos)
- (plist-put
- pos
- :created
- (parse-iso8601-time-string (plist-get pos :created))))
- (read
- (shell-command-to-string
- (org-fc-awk--pipe
- (org-fc-awk--find paths)
- (org-fc-awk--xargs
- (org-fc-awk--command
- "awk/index_cards.awk"
- :utils t
- :variables (org-fc-awk--indexer-variables))))))))
-
-(cl-defun org-fc-awk-stats-cards (&optional (paths org-fc-directories))
- "Statistics for all cards in PATHS."
+(defun org-fc-awk-index (paths)
+ "Generate a list of all files, cards & positions in PATHS.
+If FILTER-DUE is non-nil, only list non-suspended cards that are
+due for review."
(read
(shell-command-to-string
(org-fc-awk--pipe
(org-fc-awk--find paths)
(org-fc-awk--xargs
(org-fc-awk--command
- "awk/index_cards_tsv.awk"
+ "awk/index.awk"
:utils t
- :variables (org-fc-awk--indexer-variables)))
- (org-fc-awk--command "awk/stats_cards.awk" :utils t)))))
+ :variables (org-fc-awk--indexer-variables)))))))
-(defun org-fc-awk-due-positions-for-paths (paths)
- "Generate a list of due positions in PATHS."
- (org-fc-awk-positions-for-paths paths t))
+;;;; AWK Wrapper Functions
(defun org-fc-awk-positions-for-paths (paths &optional filter-due)
- "Generate a list of all positions in PATHS.
+ "Generate a list of non-suspended positions in PATHS.
If FILTER-DUE is non-nil, only list non-suspended cards that are
due for review."
- (mapcar
- (lambda (pos)
- (plist-put
- (plist-put
- (plist-put
- (plist-put
- pos
- :tags
- (org-fc-combine-tags
- (split-string (plist-get pos :inherited-tags) ":" t)
- (split-string (plist-get pos :local-tags) ":" t)))
- :due (parse-iso8601-time-string (plist-get pos :due)))
- :inherited-tags (split-string (plist-get pos :inherited-tags) ":" t))
- :local-tags (split-string (plist-get pos :local-tags) ":" t)))
- (read
- (shell-command-to-string
- (org-fc-awk--pipe
- (org-fc-awk--find paths)
- (org-fc-awk--xargs
- (org-fc-awk--command
- "awk/index_positions.awk"
- :utils t
- :variables
- (cons
- `("filter_due" . ,(if filter-due "1" "0"))
- (org-fc-awk--indexer-variables)))))))))
-
-(cl-defun org-fc-awk-stats-positions (&optional (paths org-fc-directories))
- "Statistics for all positions in PATHS."
- (read
- (shell-command-to-string
- (org-fc-awk--pipe
- (org-fc-awk--find paths)
- (org-fc-awk--xargs
- (org-fc-awk--command
- "awk/index_positions_tsv.awk"
- :utils t
- :variables (org-fc-awk--indexer-variables)))
- (org-fc-awk--command "awk/stats_positions.awk")))))
+ (let (res (now (current-time)))
+ (dolist (file (org-fc-awk-index paths))
+ (dolist (card (plist-get file :cards))
+ (unless (plist-get card :suspended)
+ (dolist (pos (plist-get card :positions))
+ (if (or (not filter-due)
+ (time-less-p (plist-get pos :due) now))
+ (push
+ (list
+ :path (plist-get file :path)
+ :id (plist-get card :id)
+ :type (plist-get card :type)
+ :due (plist-get pos :due)
+ :position (plist-get pos :position))
+ res))))))
+ res))
(defun org-fc-awk-stats-reviews ()
"Statistics for all card reviews.
@@ -1724,12 +1647,53 @@ rating the card."
(defun org-fc-review-estimate (paths n)
"Positions due in PATHS in the next N days."
- (let ((now (+ (time-to-seconds (current-time))
- (* 60 60 24 n))))
+ (let ((now (time-add (current-time) (* 60 60 24 n))))
(seq-count
- (lambda (pos) (< (time-to-seconds (plist-get pos :due)) now))
+ (lambda (pos) (time-less-p (plist-get pos :due) now))
(org-fc-awk-positions-for-paths paths))))
+(defun org-fc-stats (index)
+ "Compute statistics for an INDEX of cards and positions."
+ (let* ((total 0) (suspended 0)
+ (by-type (make-hash-table))
+ (avg-ease 0.0) (avg-box 0.0) (avg-interval 0.0)
+ (n-pos 0) (n-due 0)
+ (created-day 0) (created-week 0) (created-month 0)
+ (now (current-time))
+ (time-day (time-subtract now (* 24 60 60)))
+ (time-week (time-subtract now (* 7 24 60 60)))
+ (time-month (time-subtract now (* 30 24 60 60))))
+ (dolist (file index)
+ (dolist (card (plist-get file :cards))
+ (incf total 1)
+ (if (plist-get card :suspended)
+ (incf suspended 1)
+ (let ((created (plist-get card :created)))
+ (if (time-less-p time-day created)
+ (incf created-day 1))
+ (if (time-less-p time-week created)
+ (incf created-week 1))
+ (if (time-less-p time-month created)
+ (incf created-month 1))
+ (dolist (pos (plist-get card :positions))
+ (incf n-pos 1)
+ (if (time-less-p (plist-get pos :due) now)
+ (incf n-due 1))
+ (incf avg-ease (plist-get pos :ease))
+ (incf avg-box (plist-get pos :box))
+ (incf avg-interval (plist-get pos :interval)))))
+ (incf (gethash (plist-get card :type) by-type 0) 1)))
+ (list :total total
+ :suspended suspended
+ :due n-due
+ :by-type (org-fc-hashtable-to-alist by-type)
+ :created-day created-day
+ :created-week created-week
+ :created-month created-month
+ :avg-ease (/ avg-ease n-pos)
+ :avg-box (/ avg-box n-pos)
+ :avg-interval (/ avg-interval n-pos))))
+
;;;; Bar Chart Generation
(defun org-fc-dashboard-bar-chart (stat)
@@ -1766,8 +1730,8 @@ rating the card."
(interactive)
(let* ((buf (get-buffer-create org-fc-dashboard-buffer-name))
(inhibit-read-only t)
- (cards-stats (org-fc-awk-stats-cards))
- (positions-stats (org-fc-awk-stats-positions))
+ (index (org-fc-awk-index org-fc-directories))
+ (stats (org-fc-stats index))
(reviews-stats (org-fc-awk-stats-reviews)))
(with-current-buffer buf
(erase-buffer)
@@ -1778,36 +1742,29 @@ rating the card."
(propertize " Card Statistics\n\n" 'face 'org-level-1))
(insert (format " New: %d (day) %d (week) %d (month) \n"
- (plist-get cards-stats :created-day)
- (plist-get cards-stats :created-week)
- (plist-get cards-stats :created-month)))
+ (plist-get stats :created-day)
+ (plist-get stats :created-week)
+ (plist-get stats :created-month)))
(insert "\n")
(insert (format
" %6d Cards, %d suspended\n"
- (plist-get cards-stats :total)
- (plist-get cards-stats :suspended)))
- (dolist (position '((:type-normal . "Normal")
- (:type-double . "Double")
- (:type-text-input . "Text Input")
- (:type-cloze . "Cloze")))
- (insert
- (format " %6d %s\n"
- (or (plist-get cards-stats (car position)) 0)
- (cdr position))))
-
+ (plist-get stats :total)
+ (plist-get stats :suspended)))
+ (dolist (pair (plist-get stats :by-type))
+ (insert (format " %6d %s\n" (cdr pair) (car pair))))
(insert "\n")
(insert
(propertize " Position Statistics\n\n" 'face 'org-level-1))
- (insert (format " %6d Due Now\n\n" (plist-get positions-stats :due)))
+ (insert (format " %6d Due Now\n\n" (plist-get stats :due)))
(dolist (position '((:avg-ease . "Avg. Ease")
(:avg-box . "Avg. Box")
(:avg-interval . "Avg. Interval (days)")))
(insert
(format " %6.2f %s\n"
- (plist-get positions-stats (car position))
+ (plist-get stats (car position))
(cdr position))))
(insert "\n")