From 9f0f553ff03629637df55555fea51963a8d046e0 Mon Sep 17 00:00:00 2001 From: Leon Rische Date: Thu, 12 Nov 2020 10:58:36 +0100 Subject: Add caching indexer prototype --- Changelog.org | 9 +++ org-fc-awk.el | 9 +++ org-fc-cache.el | 181 ++++++++++++++++++++++++++++++++++++++++++++ org-fc.el | 16 ++-- tests/org-fc-filter-test.el | 19 +++-- 5 files changed, 220 insertions(+), 14 deletions(-) create mode 100644 org-fc-cache.el diff --git a/Changelog.org b/Changelog.org index d27ebf5..87b23f9 100644 --- a/Changelog.org +++ b/Changelog.org @@ -5,6 +5,15 @@ upcoming changes. In case a update to the org sources is needed, I'll add a changelog entry with updating instructions. +** [2020-11-12 Thu] +- Removed ~(org-fc-filter-index index filter)~, + replacing it with indexer specific functions + ~(org-fc-awk-index paths &optional filter)~ + and + ~(org-fc-cache-index paths &optional filter)~ +- Added a defcustom ~org-fc-index-function~ + to support different indexers +- Added a prototype caching indexer ** [2020-11-11 Wed] *** Added The dashboard now includes a forecast of how many cards will be due diff --git a/org-fc-awk.el b/org-fc-awk.el index b0dbe11..0a3d151 100644 --- a/org-fc-awk.el +++ b/org-fc-awk.el @@ -91,6 +91,15 @@ ITAGS and LTAGS are strings `\":tag1:tag2:\"'" (plist-get file :cards))) index)) +(defun org-fc-awk-index (paths &optional filter) + "Find cards in PATHS matching an optional FILTER predicate. +FILTER can be either nil or a function taking a single card as + its input." + (let ((index (org-fc-awk-index-paths paths))) + (if filter + (cl-remove-if-not filter index) + index))) + (defun org-fc-awk-index-paths (paths) "Generate a list of all cards and positions in PATHS." (let ((output (shell-command-to-string diff --git a/org-fc-cache.el b/org-fc-cache.el new file mode 100644 index 0000000..17ab403 --- /dev/null +++ b/org-fc-cache.el @@ -0,0 +1,181 @@ +;;; org-fc-cache.el --- Cache for org-fc -*- lexical-binding: t; -*- + +;; Copyright (C) 2020 Leon Rische + +;; Author: Leon Rische +;; Url: https://www.leonrische.me/pages/org_flashcards.html +;; Package-requires: ((emacs "26.3") (org "9.3")) +;; Version: 0.0.1 + +;; This program is free software; you can redistribute it and/or modify +;; it under the terms of the GNU General Public License as published by +;; the Free Software Foundation, either version 3 of the License, or +;; (at your option) any later version. + +;; This program is distributed in the hope that it will be useful, +;; but WITHOUT ANY WARRANTY; without even the implied warranty of +;; MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +;; GNU General Public License for more details. + +;; You should have received a copy of the GNU General Public License +;; along with this program. If not, see . + +;;; Commentary: +;; +;; Even with the AWK based indexer, indexing cards before each review +;; gets slow if there are a lot of files / cards. +;; +;; After running the indexer one time, file checksums are used to +;; determine which cache entries need to be updated, assuming only a +;; small subset of the flashcard files is changed between reviews, +;; this is much faster than building the full index each time. + +;;; Code: + +(require 'org-fc) + +;;; Queue / Processing of Files + +(defvar org-fc-cache + (make-hash-table :test #'equal) + "Cache mapping filenames to card lists.") + +(defun org-fc-cache-update () + "Make sure the cache is up to date." + (let* ((hashes (org-fc-cache-hashes org-fc-directories)) + (changed + (cl-remove-if + (lambda (file) + (string= + (plist-get (gethash file org-fc-cache) :hash) + (gethash file hashes))) + (hash-table-keys hashes)))) + ;; Update changed files + (dolist (new (org-fc-awk-index-files changed)) + (let* ((path (plist-get new :path)) + (hash (gethash path hashes))) + (puthash + path + (plist-put new :hash hash) + org-fc-cache))) + ;; Remove deleted files + (dolist (file (hash-table-values org-fc-cache)) + (unless (gethash file hashes) + (remhash file org-fc-cache))))) + +;;; Filtering Entries + +(defun org-fc-cache-index (paths &optional filter) + "Find cards in PATHS matching an optional FILTER. +FILTER is assumed to be a predicate function taking a single card +as its input." + (org-fc-cache-update) + ;; Make sure paths are absolute & canonical + ;; Keys of the hash table can be assumed to be absolute & canonical. + (setq paths (mapcar #'expand-file-name paths)) + (let (res) + (maphash + (lambda (path file) + (when (cl-some (lambda (p) (string-prefix-p p path)) paths) + ;; Use push instead of `nconc' because `nconc' would break + ;; the entries of the hash table. + (if filter + (dolist (card (cl-remove-if-not filter (plist-get file :cards))) + (push (plist-put card :path path) res)) + (dolist (card (plist-get file :cards)) + (push (plist-put card :path path) res))))) + org-fc-cache) + res)) + +;; TODO: Check for awk errors +;; TODO: This should go into the awk file +(defun org-fc-awk-index-files (files) + "Generate a list of all cards and positions in FILES. +Unlike `org-fc-awk-index-paths', files are included directly in +the AWK command and directories are not supported." + (mapcar + (lambda (file) + (plist-put file :cards + (mapcar + (lambda (card) + (plist-put + card :tags + (org-fc-awk-combine-tags + (plist-get card :inherited-tags) + (plist-get card :local-tags)))) + (plist-get file :cards)))) + (read + (shell-command-to-string + (org-fc-awk--command + "awk/index.awk" + :variables (org-fc-awk--indexer-variables) + :input (mapconcat #'identity files " ")))))) + +;;; Cache Mode + +(defun org-fc-cache--enable () + "Enable org-fc-cache. +Initializes the cache and adds hooks." + (message "building org-fc cache...") + (org-fc-cache-update) + (add-hook 'org-fc-before-setup-hook #'org-fc-cache-coherence-check) + (setq org-fc-index-function #'org-fc-cache-index) + (message "org-fc cache enabled")) + +(defun org-fc-cache--disable () + "Disable org-fc-cache. +Resets the cache and removes hooks." + (setq org-fc-cache (make-hash-table :test #'equal)) + (remove-hook 'org-fc-before-setup-hook #'org-fc-cache-coherence-check) + (setq org-fc-index-function #'org-fc-awk-index) + (message "org-fc cache disabled")) + +(define-minor-mode org-fc-cache-mode + "Minor mode for caching org-fc card data. + +This mode sets up several hooks to ensure the case updated when files change, +are renamed or deleted." + :lighter "org-fc cache" + :group 'org-fc + :require 'org-fc + :global t + (if org-fc-cache-mode + (org-fc-cache--enable) + (org-fc-cache--disable))) + +;;; Coherence Check + +;; TODO: There already is a similar check in org-fc, +;; those should be combined. +(defun org-fc-cache-coherence-check () + "Check if the entry at point is coherent with its cache representation. +This is especially relevant w.r.t a card's due date / suspension state before review." + (org-fc-review-with-current-item cur + (if (org-fc-suspended-entry-p) + (error "Trying to review a suspended card")) + (let* ((position (plist-get cur :position)) + (review-data (org-fc-get-review-data)) + (row (assoc position review-data #'string=)) + (due (parse-iso8601-time-string (nth 4 row)))) + (unless (time-less-p due (current-time)) + (error "Trying to review a non-due card"))))) + +;;; Hashing + +(defun org-fc-cache-hashes (directories) + "Compute hashsums of all org files in DIRECTORIES." + (let ((output (shell-command-to-string + (org-fc-awk--pipe + (org-fc-awk--find directories) + (org-fc-awk--xargs "sha1sum")))) + (table (make-hash-table :test #'equal))) + (dolist (line (split-string output "\n" t)) + (let ((parts (split-string line " "))) + (puthash (cadr parts) (car parts) table))) + table)) + +;;; Footer + +(provide 'org-fc-cache) + +;;; org-fc-cache.el ends here diff --git a/org-fc.el b/org-fc.el index 50f8986..3cecd94 100644 --- a/org-fc.el +++ b/org-fc.el @@ -71,6 +71,11 @@ Used to generate absolute paths to the awk scripts.") :type 'boolean :group 'org-fc) +(defcustom org-fc-index-function #'org-fc-awk-index + "Function used to index cards in a list of paths." + :type 'function + :group 'org-fc) + ;;;; Org Tags / Properties (defcustom org-fc-type-property "FC_TYPE" @@ -812,12 +817,6 @@ use `(and (type double) (tag \"math\"))'." `(lambda (,card-var) ,(compile-inner filter))))) -(defun org-fc-filter-index (index filter) - "Apply FILTER to cards in INDEX." - (if filter - (cl-remove-if-not (org-fc--compile-filter filter) index) - index)) - (defun org-fc-index (context) "Create an index for review CONTEXT." (let ((paths (plist-get context :paths)) @@ -827,7 +826,10 @@ use `(and (type double) (tag \"math\"))'." ((or (null paths) (eq paths 'all)) (setq paths org-fc-directories)) ((eq paths 'buffer) (setq paths (list (buffer-file-name)))) ((stringp paths) (setq paths (list paths)))) - (org-fc-filter-index (org-fc-awk-index-paths paths) filter))) + + (if filter (setq filter (org-fc--compile-filter filter))) + + (funcall org-fc-index-function paths filter))) (defun org-fc-index-flatten-card (card) "Flatten CARD into a list of positions. diff --git a/tests/org-fc-filter-test.el b/tests/org-fc-filter-test.el index 79b962c..13c5dcd 100644 --- a/tests/org-fc-filter-test.el +++ b/tests/org-fc-filter-test.el @@ -20,6 +20,11 @@ (equal (sort ids1 #'string-lessp) (sort ids2 #'string-lessp)))) +(defun org-fc-test-filter-index (index filter) + (cl-remove-if-not + (org-fc--compile-filter filter) + index)) + (ert-deftest org-fc-filter-test () (let* ((index (org-fc-awk-index-paths (list (org-fc-test-fixture "filter/"))))) ;; Index of all cards @@ -30,31 +35,31 @@ ;; Filter by type (should (org-fc-test-compare-ids - (org-fc-filter-index index '(type double)) + (org-fc-test-filter-index index '(type double)) '(a-double c-double))) ;; Filter by type, or (should (org-fc-test-compare-ids - (org-fc-filter-index index '(or (type cloze) (type double))) + (org-fc-test-filter-index index '(or (type cloze) (type double))) '(a-double c-double c-cloze))) ;; Filter by tag, direct (should (org-fc-test-compare-ids - (org-fc-filter-index index '(tag "tag1")) + (org-fc-test-filter-index index '(tag "tag1")) '(a-normal a-double))) ;; Filter by tag, inherited (should (org-fc-test-compare-ids - (org-fc-filter-index index '(tag "tag2")) + (org-fc-test-filter-index index '(tag "tag2")) '(a-double b-normal1))) ;; Filter by tag, filetag (should (org-fc-test-compare-ids - (org-fc-filter-index index '(and (tag "file1") + (org-fc-test-filter-index index '(and (tag "file1") (tag "file2") (tag "file3"))) '(c-double c-cloze))) @@ -62,12 +67,12 @@ ;; Negation (should (org-fc-test-compare-ids - (org-fc-filter-index index '(not (type normal))) + (org-fc-test-filter-index index '(not (type normal))) '(a-double c-double c-cloze))) ;; Combined (should (org-fc-test-compare-ids - (org-fc-filter-index index '(and (not (type normal)) + (org-fc-test-filter-index index '(and (not (type normal)) (tag "file1"))) '(c-double c-cloze))))) -- cgit v1.2.3