Remove the HTML parsing stuff from quip

This commit is contained in:
John Doty 2017-10-08 06:25:02 -07:00
parent beac96bb59
commit bcf17ca9ad

View file

@ -148,81 +148,5 @@ indicates how the content is to be interpreted."
(section_id . ,section)
(thread_id . ,thread))))
;;; Content parsing functions
(defun quip-get-item-type (item)
"Classify the specified HTML ITEM."
(let ((elem-type (car item)))
(cond
((eq elem-type 'p) 'paragraph)
((eq elem-type 'h1) 'h1)
((eq elem-type 'h2) 'h2)
((eq elem-type 'h3) 'h3)
((eq elem-type 'blockquote) 'block-quote)
((eq elem-type 'q) 'pull-quote)
((eq elem-type 'pre) 'code-block)
((eq elem-type 'li) 'list-item)
((eq elem-type 'span) 'span)
((eq elem-type 'div)
(letrec ((inner (cl-caddr item))
(inner-elem-type (car inner)))
(cond
((eq inner-elem-type 'ul) 'ul)
((eq inner-elem-type 'ol) 'ol)
(t 'unrecognized-inner))))
(t 'unrecognized))))
(defun quip-get-item-id (item type)
"Extract the ID from the provided ITEM given its TYPE."
(let ((attribs (cadr item)))
(cond
((or (eq type 'ul) ;; Nested IDs.
(eq type 'ol))
(letrec ((inner (cl-caddr item))
(inner-attribs (cadr inner)))
(assoc-default 'id inner-attribs)))
(t (assoc-default 'id attribs)))))
(defun quip-get-item-content (item type)
"Extract the content from the provided ITEM given its TYPE."
(cond
((or (eq type 'ul) ;; Nested Content
(eq type 'ol))
(letrec ((inner (cl-caddr item))
(inner-elems (cddr inner)))
(mapcar #'quip-get-item-from-element inner-elems)))
(t (cl-caddr item))))
(cl-defstruct quip-item type id content)
(defun quip-get-item-from-element (element)
"Construct a (type, id, content) list from the given ELEMENT."
(letrec
((item-type (quip-get-item-type element))
(item-id (quip-get-item-id element item-type))
(item-content (quip-get-item-content element item-type)))
(make-quip-item
:type item-type
:id item-id
:content item-content)))
(defun quip-parse-html-content (html)
"Parse the provided HTML into a list of (type, item, content) lists."
(with-temp-buffer
(insert html)
(letrec
((parsed-html (libxml-parse-html-region (point-min) (point-max)))
(raw-items (cddr (cl-caddr parsed-html)))
(html-items (cl-remove-if #'stringp raw-items)))
(mapcar #'quip-get-item-from-element html-items)
)))
;; (prin1
;; (quip-parse-html-content
;; (assoc-default 'html (quip-get-thread "idflAWG6R6Uu"))))
(provide 'quip)
;;; quip.el ends here