summaryrefslogtreecommitdiff
path: root/awk
diff options
context:
space:
mode:
authorLeon Rische <leon.rische@me.com>2020-05-02 21:05:35 +0200
committerLeon Rische <leon.rische@me.com>2020-05-02 21:05:35 +0200
commit4bdd0f0e9ed4ddcb0773e1813eb787a22c49cfc9 (patch)
tree5f2989fad21f4d4e5ee76d810f479cd641f84065 /awk
parenta2aa55e2855e415ee52a46e36bdfa2060a01f744 (diff)
Replace AWK scripts with single indexer script
Diffstat (limited to 'awk')
-rw-r--r--awk/files.awk18
-rw-r--r--awk/index.awk183
-rw-r--r--awk/index_cards.awk59
-rw-r--r--awk/index_positions.awk115
-rw-r--r--awk/stats_cards.awk50
-rw-r--r--awk/stats_positions.awk54
6 files changed, 183 insertions, 296 deletions
diff --git a/awk/files.awk b/awk/files.awk
deleted file mode 100644
index 6926408..0000000
--- a/awk/files.awk
+++ /dev/null
@@ -1,18 +0,0 @@
-BEGIN {
- FS="|";
-}
-
-BEGINFILE {
- has_card = 0;
-}
-
-# Flashcard headings
-/^\*+ .*:fc:.*$/ {
- has_card = 1;
-}
-
-ENDFILE {
- if (has_card == 1) {
- print FILENAME;
- }
-}
diff --git a/awk/index.awk b/awk/index.awk
new file mode 100644
index 0000000..6559f3c
--- /dev/null
+++ b/awk/index.awk
@@ -0,0 +1,183 @@
+BEGIN {
+ # The only time we're interested in multiple fields is when
+ # parsing the review data drawer.
+ FS="|";
+
+ now = strftime("%FT%TZ", systime(), 1);
+
+ fc_tag = ":" or_default(fc_tag, "fc") ":";
+ suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
+ review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
+ type_property = or_default(type_property, "FC_TYPE");
+ created_property = or_default(created_property, "FC_CREATED");
+
+ # Small state machine to make sure cards are in the correct format
+ state = 0;
+ state_file = 0;
+ state_card = 1;
+ state_properties = 2;
+ state_properties_done = 3;
+ state_review_data = 4;
+ state_review_data_done = 5;
+
+ print "(";
+}
+
+# Convert an ISO8601 timestamp to an Emacs timestamp
+# (second_upper_16_bit, second_lower_16_bit)
+function parse_time(time) {
+ # mktime expects a format of "YYYY MM DD HH MM SS"
+ # and doesn't care about the trailing space left by the "Z"
+ gsub(/[\-T:Z]/, " ", time);
+
+ ts = mktime(time, 1);
+ ts_h = rshift(ts, 16);
+ ts_l = and(ts, 0xffff);
+
+ return "(" ts_h " " ts_l ")";
+}
+
+## File Parsing
+
+BEGINFILE {
+ # Data for files is only printed once we have encountered the
+ # first card.
+ printed_file_line = 0;
+ needs_file_closing = 0;
+
+ # Stack of parent headline tags, level 0 is used for filetags
+ parent_tags[0] = "";
+ state = state_file;
+}
+
+ENDFILE {
+ if (needs_file_closing) {
+ print " ))";
+ needs_file_closing = 0;
+ }
+}
+
+## Filetags
+
+match($0, /#\+FILETAGS:[ \t]+(.*)/, a) {
+ parent_tags[0] = a[1];
+ next;
+}
+
+## Heading Parsing
+
+match($0, /^(\*)+[ \t]+.*$/, a) {
+ level = length(a[1]);
+ tags = "";
+
+ # tag re based on org-tag-re
+ # TODO: Do this in a single match
+ if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) {
+ tags = b[1];
+ }
+ parent_tags[level] = tags;
+
+ id = "none";
+
+ if (tags ~ fc_tag) {
+ state = state_card;
+ suspended = (tags ~ suspended_tag);
+ }
+ next;
+}
+
+## Drawer Parsing
+
+/:PROPERTIES:/ {
+ if (state == state_card) {
+ state = state_properties;
+ delete properties;
+ }
+ next;
+}
+
+$0 ~ review_data_drawer {
+ # Make sure the review data comes after the property drawer
+ if (state == state_properties_done) {
+ delete review_data;
+ review_index = 1;
+ state = state_review_data;
+ }
+ next;
+}
+
+/:END:/ {
+ if (state == state_properties) {
+ state = state_properties_done;
+ } else if (state == state_review_data) {
+ state = state_review_data_done;
+ # If this is the first card in a file, print the file "header"
+ if (!printed_file_line) {
+ print " (" \
+ ":path \"" FILENAME "\"" \
+ " :cards (";
+ printed_file_line = 1;
+ needs_file_closing = 1;
+ }
+
+ # Card header
+ inherited_tags = "";
+ for (i = 0; i < level; i++) {
+ inherited_tags = combine_tags(inherited_tags, parent_tags[i]);
+ }
+ local_tags = parent_tags[level];
+
+ print " (" \
+ ":id \"" properties["ID"] "\"" \
+ " :type " properties[type_property] \
+ " :created " parse_time(properties[created_property]) \
+ " :suspended " (suspended ? "t" : "nil") \
+ " :inherited-tags \"" inherited_tags "\"" \
+ " :local-tags \"" local_tags "\"" \
+ " :positions (";
+
+ # Card positions
+ for (i = 1; i < review_index; i++) {
+ print " (" \
+ ":position \"" review_data[i]["position"] "\"" \
+ " :ease " review_data[i]["ease"] \
+ " :box " review_data[i]["box"] \
+ " :interval " review_data[i]["interval"] \
+ " :due " parse_time(review_data[i]["due"]) \
+ ")"
+ }
+ print " ))";
+ }
+ next;
+}
+
+## Property Parsing
+
+(state == state_properties) && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
+ properties[a[1]] = trim_surrounding(a[2]);
+ next;
+}
+
+## Review data parsing
+
+# TODO: Explicit match, to check for broken drawers
+#
+# Positions are collected in an array first,
+# in case the review drawer is broken.
+(state == state_review_data) && /^\|.*\|$/ {
+ # check NF to skip the |--+--| table separator
+ # match on $2 to skip the table header
+ if (NF == 7 && $2 !~ "position") {
+ review_data[review_index]["position"] = trim($2);
+ review_data[review_index]["ease"] = trim($3);
+ review_data[review_index]["box"] = trim($4);
+ review_data[review_index]["interval"] = trim($5);
+ review_data[review_index]["due"] = trim_surrounding($6);
+ review_index += 1;
+ }
+ next;
+}
+
+END {
+ print ")";
+}
diff --git a/awk/index_cards.awk b/awk/index_cards.awk
deleted file mode 100644
index 0785543..0000000
--- a/awk/index_cards.awk
+++ /dev/null
@@ -1,59 +0,0 @@
-BEGIN {
- FS="|";
-
- fc_tag = ":" or_default(fc_tag, "fc") ":";
- suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
- review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
- type_property = or_default(type_property, "FC_TYPE");
- created_property = or_default(created_property, "FC_CREATED");
-
- print "(";
-}
-
-## Heading Parsing
-
-/^\*+[ \t]+.*$/ {
- # tag re based on org-tag-re
- match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, a)
- tags = a[1]
- id = "none";
-
- if (tags ~ fc_tag) {
- in_card = 1;
- suspended = (tags ~ suspended_tag);
- } else {
- in_card = 0;
- }
- next
-}
-
-## Property parsing
-
-in_card && /:PROPERTIES:/ {
- in_properties = 1;
- delete properties;
-}
-
-in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
- properties[a[1]] = trim_surrounding(a[2]);
-}
-
-in_properties && /:END:/ {
- id = properties["ID"];
- type = properties[type_property];
- created = properties[created_property];
- print " (" \
- ":path \"" FILENAME "\"" \
- " :id \"" id "\"" \
- " :type " type \
- " :suspended " (suspended ? "t" : "nil") \
- " :created \"" created "\"" \
- ")"
-
- in_properties = 0;
- in_card = 0;
-}
-
-END {
- print ")";
-}
diff --git a/awk/index_positions.awk b/awk/index_positions.awk
deleted file mode 100644
index c1c883c..0000000
--- a/awk/index_positions.awk
+++ /dev/null
@@ -1,115 +0,0 @@
-BEGIN {
- FS="|";
- now = strftime("%FT%TZ", systime(), 1);
-
- fc_tag = ":" or_default(fc_tag, "fc") ":";
- suspended_tag = ":" or_default(suspended_tag, "suspended") ":";
- review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":";
- type_property = or_default(type_property, "FC_TYPE");
- created_property = or_default(created_property, "FC_CREATED");
-
- print "(";
-}
-
-BEGINFILE {
- # Stack of parent headline tags, level 0 is used for filetags
- parent_tags[0] = "";
-}
-
-## Filetags
-
-match($0, /#\+FILETAGS:[ \t]+(.*)/, a) {
- parent_tags[0] = a[1];
-}
-
-## Heading Parsing
-
-match($0, /^(\*)+[ \t]+.*$/, a) {
- level = length(a[1]);
- tags = ""
-
- # tag re based on org-tag-re
- if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) {
- tags = b[1];
- }
- parent_tags[level] = tags;
-
- id = "none";
-
- if (tags ~ fc_tag) {
- in_card = 1;
- suspended = (tags ~ suspended_tag);
- } else {
- in_card = 0;
- }
-
- last_level = level;
- next
-}
-
-## Property parsing
-
-in_card && /:PROPERTIES:/ {
- in_properties = 1;
- delete properties;
-}
-
-in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) {
- properties[a[1]] = trim_surrounding(a[2]);
-}
-
-in_properties && /:END:/ {
- in_properties = 0;
-}
-
-## Review data parsing
-
-in_card && $0 ~ review_data_drawer {
- in_data = 1;
-}
-
-in_data && /:END:/ {
- in_data = 0;
-}
-
-in_data && /^\|.*\|$/ {
- # Make sure we're inside a data block,
- # check NF to skip the |--+--| table separator
- # match on $2 to skip the table header
- if (in_data == 1 && NF == 7 && $2 !~ "position") {
- id = properties["ID"];
- type = properties[type_property];
-
- position = trim($2);
- ease = trim($3);
- box = trim($4);
- interval = trim($5);
- due = trim_surrounding($6);
-
- inherited_tags = "";
- for (i = 0; i < level; i++) {
- inherited_tags = combine_tags(inherited_tags, parent_tags[i]);
- }
- local_tags = parent_tags[level];
-
- if (!(filter_due == "1") || (due < now && suspended == "0")) {
- print "(" \
- ":path \"" FILENAME "\"" \
- " :id \"" id "\"" \
- " :type " type \
- " :suspended " (suspended ? "t" : "nil") \
- " :position \"" position "\"" \
- " :ease " ease \
- " :box " box \
- " :interval " interval \
- " :due \"" due "\"" \
- " :inherited-tags \"" inherited_tags "\"" \
- " :local-tags \"" local_tags "\"" \
- ")"
- }
- }
-}
-
-END {
- print ")";
-}
diff --git a/awk/stats_cards.awk b/awk/stats_cards.awk
deleted file mode 100644
index 31a0047..0000000
--- a/awk/stats_cards.awk
+++ /dev/null
@@ -1,50 +0,0 @@
-BEGIN {
- FS="\t";
- total = 0;
- n_suspended = 0;
-
- t_day = time_days_ago(1);
- t_week = time_days_ago(7);
- t_month = time_days_ago(30);
-
- created["day"] = 0;
- created["week"] = 0;
- created["month"] = 0;
-}
-
-{
- total += 1;
-
- type = $3;
- by_type[type] += 1;
- suspended = $4 == "1";
-
- if (suspended) {
- n_suspended++;
- } else {
- if ($5 > t_day) {
- created["day"]++;
- }
-
- if ($5 > t_week) {
- created["week"]++;
- }
-
- if ($5 > t_month) {
- created["month"]++;
- }
- }
-}
-
-END {
- print "("
- print " :total " total;
- print " :suspended " n_suspended;
- print " :created-day " created["day"];
- print " :created-week " created["week"];
- print " :created-month " created["month"];
- for (var in by_type) {
- print " :type-" var " " by_type[var];
- }
- print ")"
-}
diff --git a/awk/stats_positions.awk b/awk/stats_positions.awk
deleted file mode 100644
index c276f87..0000000
--- a/awk/stats_positions.awk
+++ /dev/null
@@ -1,54 +0,0 @@
-BEGIN {
- FS="\t";
- total = 0;
- suspended = 0;
- ease = 0;
- interval = 0;
- box = 0;
- due = 0;
- now = strftime("%FT%TZ", systime(), 1);
- n_stats = 0;
-}
-
-{
- total += 1;
-
- type = $3;
- by_type[type] += 1;
-
- # Don't collect ease / box / interval stats for suspended cards
- if ($4 == "1") {
- suspended += 1;
- } else {
- ease += $6;
- box += $7;
- interval += $8;
- n_stats++;
- }
-
-
- if ($4 == "0" && $9 < now) {
- due += 1;
- }
-}
-
-END {
- print "("
- print " :total " total;
- print " :suspended " suspended;
- print " :due " due;
- for (var in by_type) {
- print " :type-" var " " by_type[var];
- }
-
- if (n_stats > 0) {
- print " :avg-ease " ease / n_stats;
- print " :avg-box " box / n_stats;
- print " :avg-interval " interval / n_stats;
- } else {
- print " :avg-ease " 0.0;
- print " :avg-box " 0;
- print " :avg-interval " 0.0;
- }
- print ")"
-}