A large step back and then a small step forward.

This commit is contained in:
John Doty 2017-10-16 22:01:36 -07:00
parent 15fb23dc27
commit 00a4b3e583

View file

@ -1,4 +1,4 @@
;;; ox-quip.el -- Publish from org-mode to Quip. ;;; ox-quip.el -- Publish from org-mode to Quip. -*- lexical-binding: t -*-
;;; Commentary: ;;; Commentary:
;; Publisher from org-mode to Quip. (Export as markdown, push as a new ;; Publisher from org-mode to Quip. (Export as markdown, push as a new
@ -31,24 +31,17 @@ compare properly."
(mapcar #'org-quip--clean-element-tree (mapcar #'org-quip--clean-element-tree
(cl-remove-if 'org-quip--remove-element-p (cddr elem)))) (cl-remove-if 'org-quip--remove-element-p (cddr elem))))
(defun org-quip--clean-element-tree (elem) (defun org-quip--clean-string (string tag)
"Clean up an HTML element ELEM, removing contents that are empty strings." "Clean up the text in the STRING contained in TAG, to match good text."
(cond (let ((string string))
((listp elem) (setq string (replace-regexp-in-string (char-to-string 160) " " string))
(append (list (first elem) (second elem)) (setq string (replace-regexp-in-string (char-to-string #x200B) "" string))
(org-quip--clean-element-contents (cddr elem)))) (unless (eq 'pre tag)
(setq string (replace-regexp-in-string "[[:space:]]+" " " string)))
string))
((stringp elem) (defun org-quip--clean-element-contents (contents tag)
(let ((elem elem)) "Do some shit with CONTENTS of TAG.
(setq elem (replace-regexp-in-string (char-to-string 160) " " elem))
(setq elem (replace-regexp-in-string (char-to-string #x200B) " " elem))
(setq elem (replace-regexp-in-string "[[:space:]]+" " " elem))
elem))
(t elem)))
(defun org-quip--flatten-tree (dom)
"Clean up the DOM by flattening child elements.
We extract and flatten children from wrapper elements like We extract and flatten children from wrapper elements like
<html>, <body>, some <divs>, &c. The goal is to be able to run <html>, <body>, some <divs>, &c. The goal is to be able to run
@ -59,35 +52,49 @@ comparable.
\(The specific org export we're dealing with is \(The specific org export we're dealing with is
'org-quip--export-html'.)" 'org-quip--export-html'.)"
(cond (cond
;; If the thing we're looking at is an atom of some sort (string, ; (base case)
;; whatever) then we just wrap it in a list. ((null contents) nil)
((not (listp dom))
(list dom))
;; If the thing we're looking at it one of the kinds of elements that we ; Join strings together until we're
;; want to unwrap, then join the lists containing the result of recursing ; done. But we're not perfect.
;; on this element's children. ((and (stringp (car contents)) (stringp (cadr contents)))
;; (org-quip--clean-element-contents (cons (concat (car contents)
;; These are verboten automatically. (cadr contents))
((or (member (first dom) '(div html body span br)) (cddr contents))
;; No anchors without hrefs. tag))
(and (eq (first dom) 'a)
(second dom)
(not (alist-get 'href (second dom))))
)
(apply #'append (mapcar #'org-quip--flatten-tree (cddr dom))))
;; Otherwise: the thing we're looking at is an element we'd like to ; We're done joining strings, clean
;; keep. Keep the tag and attributes and have the children be the ; this one up, and move on with the
;; flattened result of recursing on the original element's children. ; rest of the work.
;; ((stringp (car contents))
;; (It's super confusing because we have to return this element in a list, (cons (org-quip--clean-string (car contents) tag)
;; which is itself a list...) (org-quip--clean-element-contents (cdr contents) tag)))
; We're looking at an element; is
; this an element we're unfolding?
((or (member (caar contents) '(div html body span br))
(and (eq (caar contents) 'a)
(cdar contents)
(not (alist-get 'href (cdar contents)))))
(org-quip--clean-element-contents (append (cddar contents) (cdr contents))
tag))
; OK, we're keeping this element.
; Clean it up and stick it in the
; list.
(t (t
(letrec ((children (cddr dom)) (letrec ((new-tag (caar contents))
(flat-children (apply #'append (attribs (when (member new-tag '(h1 h2 h3 h4 h5 h6))
(mapcar #'org-quip--flatten-tree children)))) (list
(list (append (list (first dom) (second dom) flat-children))))))) (cons 'id
(org-quip--get-element-id (car contents))))))
(body (org-quip--clean-element-contents (cddar contents) new-tag))
(new-elem (append (list new-tag attribs) body)))
(cons new-elem
(org-quip--clean-element-contents (cdr contents) tag))))
))
(defun org-quip--get-cleaned-dom (html) (defun org-quip--get-cleaned-dom (html)
"Clean HTML as a list of cleaned DOM elements. "Clean HTML as a list of cleaned DOM elements.
@ -98,9 +105,8 @@ results of the quip exporter and the HTML we get back from quip
to produce comparable results." to produce comparable results."
(with-temp-buffer (with-temp-buffer
(insert html) (insert html)
(letrec ((parsed-html (libxml-parse-html-region (point-min) (point-max))) (let ((parsed-html (libxml-parse-html-region (point-min) (point-max))))
(cleaned-html (org-quip--clean-element-tree parsed-html))) (org-quip--clean-element-contents (list parsed-html) nil))))
(org-quip--flatten-tree cleaned-html))))
;; ============================================== ;; ==============================================
;; Functions to do with mapping HTML and ORG IDs. ;; Functions to do with mapping HTML and ORG IDs.
@ -389,36 +395,6 @@ This replaces what's in the buffer so I hope you're OK with that."
))) )))
(defun org-quip--merge-contents (list)
"Concatenate adjacent strings in LIST."
(let ((current (car list))
(next (car (cdr list)))
(rest (cdr (cdr list))))
(cond
((null current) nil)
((and (stringp current) (stringp next))
(jd/merge-it (cons (concat current " " next) rest)))
(t
(cons current (org-quip--merge-contents (cdr list)))))))
(defun org-quip--get-squeaky-dom (dom)
"Clean up DOM even further, for the purposes of diffing."
(cond
((stringp dom) (string-trim dom))
((member (first dom) '(h1 h2 h3 h4 h5 h6))
(list (first dom)
(list (cons 'id (org-quip--get-element-id dom)))
(mapcar #'org-quip--get-squeaky-dom (cddr dom))))
((symbolp (first dom))
(list (first dom)
nil
(org-quip--merge-contents
(mapcar #'org-quip--get-squeaky-dom (cddr dom)))))
(t (org-quip--merge-contents (mapcar #'org-quip--get-squeaky-dom dom)))))
(defun org-quip--get-cleaned-html-string (html) (defun org-quip--get-cleaned-html-string (html)
"Clean HTML as a nice string for diffing." "Clean HTML as a nice string for diffing."
(with-temp-buffer (with-temp-buffer
@ -426,8 +402,7 @@ This replaces what's in the buffer so I hope you're OK with that."
(lambda (elem) (lambda (elem)
(prin1 elem (current-buffer)) (prin1 elem (current-buffer))
(insert "\n")) (insert "\n"))
(org-quip--get-squeaky-dom (org-quip--get-cleaned-dom html))) (org-quip--get-cleaned-dom html))
(buffer-string))) (buffer-string)))
(defun org-quip--test-blarg () (defun org-quip--test-blarg ()
@ -466,8 +441,6 @@ This replaces what's in the buffer so I hope you're OK with that."
(display-buffer (current-buffer)))) (display-buffer (current-buffer))))
(org-quip--merge-contents '("a" "b" 1 "c"))
(third '(1)) (third '(1))
) )