From 4bdd0f0e9ed4ddcb0773e1813eb787a22c49cfc9 Mon Sep 17 00:00:00 2001 From: Leon Rische Date: Sat, 2 May 2020 21:05:35 +0200 Subject: Replace AWK scripts with single indexer script --- awk/files.awk | 18 ----- awk/index.awk | 183 ++++++++++++++++++++++++++++++++++++++++++++++++ awk/index_cards.awk | 59 ---------------- awk/index_positions.awk | 115 ------------------------------ awk/stats_cards.awk | 50 ------------- awk/stats_positions.awk | 54 -------------- 6 files changed, 183 insertions(+), 296 deletions(-) delete mode 100644 awk/files.awk create mode 100644 awk/index.awk delete mode 100644 awk/index_cards.awk delete mode 100644 awk/index_positions.awk delete mode 100644 awk/stats_cards.awk delete mode 100644 awk/stats_positions.awk (limited to 'awk') diff --git a/awk/files.awk b/awk/files.awk deleted file mode 100644 index 6926408..0000000 --- a/awk/files.awk +++ /dev/null @@ -1,18 +0,0 @@ -BEGIN { - FS="|"; -} - -BEGINFILE { - has_card = 0; -} - -# Flashcard headings -/^\*+ .*:fc:.*$/ { - has_card = 1; -} - -ENDFILE { - if (has_card == 1) { - print FILENAME; - } -} diff --git a/awk/index.awk b/awk/index.awk new file mode 100644 index 0000000..6559f3c --- /dev/null +++ b/awk/index.awk @@ -0,0 +1,183 @@ +BEGIN { + # The only time we're interested in multiple fields is when + # parsing the review data drawer. + FS="|"; + + now = strftime("%FT%TZ", systime(), 1); + + fc_tag = ":" or_default(fc_tag, "fc") ":"; + suspended_tag = ":" or_default(suspended_tag, "suspended") ":"; + review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":"; + type_property = or_default(type_property, "FC_TYPE"); + created_property = or_default(created_property, "FC_CREATED"); + + # Small state machine to make sure cards are in the correct format + state = 0; + state_file = 0; + state_card = 1; + state_properties = 2; + state_properties_done = 3; + state_review_data = 4; + state_review_data_done = 5; + + print "("; +} + +# Convert an ISO8601 timestamp to an Emacs timestamp +# (second_upper_16_bit, second_lower_16_bit) +function parse_time(time) { + # mktime expects a format of "YYYY MM DD HH MM SS" + # and doesn't care about the trailing space left by the "Z" + gsub(/[\-T:Z]/, " ", time); + + ts = mktime(time, 1); + ts_h = rshift(ts, 16); + ts_l = and(ts, 0xffff); + + return "(" ts_h " " ts_l ")"; +} + +## File Parsing + +BEGINFILE { + # Data for files is only printed once we have encountered the + # first card. + printed_file_line = 0; + needs_file_closing = 0; + + # Stack of parent headline tags, level 0 is used for filetags + parent_tags[0] = ""; + state = state_file; +} + +ENDFILE { + if (needs_file_closing) { + print " ))"; + needs_file_closing = 0; + } +} + +## Filetags + +match($0, /#\+FILETAGS:[ \t]+(.*)/, a) { + parent_tags[0] = a[1]; + next; +} + +## Heading Parsing + +match($0, /^(\*)+[ \t]+.*$/, a) { + level = length(a[1]); + tags = ""; + + # tag re based on org-tag-re + # TODO: Do this in a single match + if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) { + tags = b[1]; + } + parent_tags[level] = tags; + + id = "none"; + + if (tags ~ fc_tag) { + state = state_card; + suspended = (tags ~ suspended_tag); + } + next; +} + +## Drawer Parsing + +/:PROPERTIES:/ { + if (state == state_card) { + state = state_properties; + delete properties; + } + next; +} + +$0 ~ review_data_drawer { + # Make sure the review data comes after the property drawer + if (state == state_properties_done) { + delete review_data; + review_index = 1; + state = state_review_data; + } + next; +} + +/:END:/ { + if (state == state_properties) { + state = state_properties_done; + } else if (state == state_review_data) { + state = state_review_data_done; + # If this is the first card in a file, print the file "header" + if (!printed_file_line) { + print " (" \ + ":path \"" FILENAME "\"" \ + " :cards ("; + printed_file_line = 1; + needs_file_closing = 1; + } + + # Card header + inherited_tags = ""; + for (i = 0; i < level; i++) { + inherited_tags = combine_tags(inherited_tags, parent_tags[i]); + } + local_tags = parent_tags[level]; + + print " (" \ + ":id \"" properties["ID"] "\"" \ + " :type " properties[type_property] \ + " :created " parse_time(properties[created_property]) \ + " :suspended " (suspended ? "t" : "nil") \ + " :inherited-tags \"" inherited_tags "\"" \ + " :local-tags \"" local_tags "\"" \ + " :positions ("; + + # Card positions + for (i = 1; i < review_index; i++) { + print " (" \ + ":position \"" review_data[i]["position"] "\"" \ + " :ease " review_data[i]["ease"] \ + " :box " review_data[i]["box"] \ + " :interval " review_data[i]["interval"] \ + " :due " parse_time(review_data[i]["due"]) \ + ")" + } + print " ))"; + } + next; +} + +## Property Parsing + +(state == state_properties) && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) { + properties[a[1]] = trim_surrounding(a[2]); + next; +} + +## Review data parsing + +# TODO: Explicit match, to check for broken drawers +# +# Positions are collected in an array first, +# in case the review drawer is broken. +(state == state_review_data) && /^\|.*\|$/ { + # check NF to skip the |--+--| table separator + # match on $2 to skip the table header + if (NF == 7 && $2 !~ "position") { + review_data[review_index]["position"] = trim($2); + review_data[review_index]["ease"] = trim($3); + review_data[review_index]["box"] = trim($4); + review_data[review_index]["interval"] = trim($5); + review_data[review_index]["due"] = trim_surrounding($6); + review_index += 1; + } + next; +} + +END { + print ")"; +} diff --git a/awk/index_cards.awk b/awk/index_cards.awk deleted file mode 100644 index 0785543..0000000 --- a/awk/index_cards.awk +++ /dev/null @@ -1,59 +0,0 @@ -BEGIN { - FS="|"; - - fc_tag = ":" or_default(fc_tag, "fc") ":"; - suspended_tag = ":" or_default(suspended_tag, "suspended") ":"; - review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":"; - type_property = or_default(type_property, "FC_TYPE"); - created_property = or_default(created_property, "FC_CREATED"); - - print "("; -} - -## Heading Parsing - -/^\*+[ \t]+.*$/ { - # tag re based on org-tag-re - match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, a) - tags = a[1] - id = "none"; - - if (tags ~ fc_tag) { - in_card = 1; - suspended = (tags ~ suspended_tag); - } else { - in_card = 0; - } - next -} - -## Property parsing - -in_card && /:PROPERTIES:/ { - in_properties = 1; - delete properties; -} - -in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) { - properties[a[1]] = trim_surrounding(a[2]); -} - -in_properties && /:END:/ { - id = properties["ID"]; - type = properties[type_property]; - created = properties[created_property]; - print " (" \ - ":path \"" FILENAME "\"" \ - " :id \"" id "\"" \ - " :type " type \ - " :suspended " (suspended ? "t" : "nil") \ - " :created \"" created "\"" \ - ")" - - in_properties = 0; - in_card = 0; -} - -END { - print ")"; -} diff --git a/awk/index_positions.awk b/awk/index_positions.awk deleted file mode 100644 index c1c883c..0000000 --- a/awk/index_positions.awk +++ /dev/null @@ -1,115 +0,0 @@ -BEGIN { - FS="|"; - now = strftime("%FT%TZ", systime(), 1); - - fc_tag = ":" or_default(fc_tag, "fc") ":"; - suspended_tag = ":" or_default(suspended_tag, "suspended") ":"; - review_data_drawer = ":" or_default(review_data_drawer, "REVIEW_DATA") ":"; - type_property = or_default(type_property, "FC_TYPE"); - created_property = or_default(created_property, "FC_CREATED"); - - print "("; -} - -BEGINFILE { - # Stack of parent headline tags, level 0 is used for filetags - parent_tags[0] = ""; -} - -## Filetags - -match($0, /#\+FILETAGS:[ \t]+(.*)/, a) { - parent_tags[0] = a[1]; -} - -## Heading Parsing - -match($0, /^(\*)+[ \t]+.*$/, a) { - level = length(a[1]); - tags = "" - - # tag re based on org-tag-re - if (match($0, /^\*+[ \t]+.*[ \t]+(:([a-zA-Z0-9_@#%]+:)+)$/, b) != 0) { - tags = b[1]; - } - parent_tags[level] = tags; - - id = "none"; - - if (tags ~ fc_tag) { - in_card = 1; - suspended = (tags ~ suspended_tag); - } else { - in_card = 0; - } - - last_level = level; - next -} - -## Property parsing - -in_card && /:PROPERTIES:/ { - in_properties = 1; - delete properties; -} - -in_properties && match($0, /^[ \t]*:([a-zA-Z0-9_]+):[ \t]*(.+)$/, a) { - properties[a[1]] = trim_surrounding(a[2]); -} - -in_properties && /:END:/ { - in_properties = 0; -} - -## Review data parsing - -in_card && $0 ~ review_data_drawer { - in_data = 1; -} - -in_data && /:END:/ { - in_data = 0; -} - -in_data && /^\|.*\|$/ { - # Make sure we're inside a data block, - # check NF to skip the |--+--| table separator - # match on $2 to skip the table header - if (in_data == 1 && NF == 7 && $2 !~ "position") { - id = properties["ID"]; - type = properties[type_property]; - - position = trim($2); - ease = trim($3); - box = trim($4); - interval = trim($5); - due = trim_surrounding($6); - - inherited_tags = ""; - for (i = 0; i < level; i++) { - inherited_tags = combine_tags(inherited_tags, parent_tags[i]); - } - local_tags = parent_tags[level]; - - if (!(filter_due == "1") || (due < now && suspended == "0")) { - print "(" \ - ":path \"" FILENAME "\"" \ - " :id \"" id "\"" \ - " :type " type \ - " :suspended " (suspended ? "t" : "nil") \ - " :position \"" position "\"" \ - " :ease " ease \ - " :box " box \ - " :interval " interval \ - " :due \"" due "\"" \ - " :inherited-tags \"" inherited_tags "\"" \ - " :local-tags \"" local_tags "\"" \ - ")" - } - } -} - -END { - print ")"; -} diff --git a/awk/stats_cards.awk b/awk/stats_cards.awk deleted file mode 100644 index 31a0047..0000000 --- a/awk/stats_cards.awk +++ /dev/null @@ -1,50 +0,0 @@ -BEGIN { - FS="\t"; - total = 0; - n_suspended = 0; - - t_day = time_days_ago(1); - t_week = time_days_ago(7); - t_month = time_days_ago(30); - - created["day"] = 0; - created["week"] = 0; - created["month"] = 0; -} - -{ - total += 1; - - type = $3; - by_type[type] += 1; - suspended = $4 == "1"; - - if (suspended) { - n_suspended++; - } else { - if ($5 > t_day) { - created["day"]++; - } - - if ($5 > t_week) { - created["week"]++; - } - - if ($5 > t_month) { - created["month"]++; - } - } -} - -END { - print "(" - print " :total " total; - print " :suspended " n_suspended; - print " :created-day " created["day"]; - print " :created-week " created["week"]; - print " :created-month " created["month"]; - for (var in by_type) { - print " :type-" var " " by_type[var]; - } - print ")" -} diff --git a/awk/stats_positions.awk b/awk/stats_positions.awk deleted file mode 100644 index c276f87..0000000 --- a/awk/stats_positions.awk +++ /dev/null @@ -1,54 +0,0 @@ -BEGIN { - FS="\t"; - total = 0; - suspended = 0; - ease = 0; - interval = 0; - box = 0; - due = 0; - now = strftime("%FT%TZ", systime(), 1); - n_stats = 0; -} - -{ - total += 1; - - type = $3; - by_type[type] += 1; - - # Don't collect ease / box / interval stats for suspended cards - if ($4 == "1") { - suspended += 1; - } else { - ease += $6; - box += $7; - interval += $8; - n_stats++; - } - - - if ($4 == "0" && $9 < now) { - due += 1; - } -} - -END { - print "(" - print " :total " total; - print " :suspended " suspended; - print " :due " due; - for (var in by_type) { - print " :type-" var " " by_type[var]; - } - - if (n_stats > 0) { - print " :avg-ease " ease / n_stats; - print " :avg-box " box / n_stats; - print " :avg-interval " interval / n_stats; - } else { - print " :avg-ease " 0.0; - print " :avg-box " 0; - print " :avg-interval " 0.0; - } - print ")" -} -- cgit v1.2.3