diff options
-rw-r--r-- | epub.py | 105 |
1 files changed, 67 insertions, 38 deletions
@@ -87,45 +87,74 @@ class Page: CONTENT = "content" @staticmethod - def _generate_inner(root, content): + def _generate_inner(root, stuff): static = {} - for elem in content: - tstack = [] - while not issubclass(type(elem), str): - tag, elem = elem - if not tag: - break - tstack.append(tag) - if not tstack: - tstack.append("p") - - tag = root - for t in tstack: - if not isinstance(t, str): - t, attrib = t - else: - attrib = {} - - if t == "img" and "src" in attrib: ## Store the src - ext = os.path.splitext(attrib["src"])[1] - h = hash_file(attrib["src"]) - if ext: - h += ext - attrib = attrib.copy() - static["OEBPS/Static/"+h] = attrib["src"] - attrib["src"] = "../Static/"+h - elif ("src" in attrib and os.path.isfile(attrib["src"])) or ("href" in attrib and os.path.isfile(attrib["href"])): - raise ValueError("Unknown tag %s for href/src" % t) - - tag = etree.SubElement(tag, t, attrib) - if not isinstance(elem, str): - st = Page._generate_inner(tag, elem) - static.update(st) - else: - tag.text = elem + if isinstance(stuff, str): + tag, elem = None, stuff + else: + tag, elem = stuff + + if not isinstance(tag, str) and tag is not None: + tag, attrib = tag + else: + attrib = {} - return static + ## There are three cases we need to handle + ## 1. Text node, in which case we return the text + ## 2. Regular node, in which case we create the node and return + ## 3. "" node, in which case we iterate over the list + + if tag is None: ## Text node + if not isinstance(elem, str): + raise ValueError("Text node must be most nested tag") + return elem, {} + elif tag: ## Regular node + ## We need to do a bit more work for images/links + if tag == "img" and "src" in attrib: ## Store image src and redirect it + ext = os.path.splitext(attrib["src"])[1] + h = hash_file(attrib["src"]) + if ext: + h += ext + attrib = attrib.copy() + static["OEBPS/Static/"+h] = attrib["src"] + attrib["src"] = "../Static/"+h + elif ("src" in attrib and os.path.isfile(attrib["src"])) or ("href" in attrib and not attrib["href"].startswith('#') and os.path.isfile(attrib["href"])): + raise ValueError("Unknown tag %s for href/src" % tag) + + this = etree.SubElement(root, tag, attrib) + + rest, new_static = Page._generate_inner(this, elem) + static.update(new_static) + + if isinstance(rest, str): ## It was some text + this.text = rest + + return this, static + else: ## List node + ## For text nodes, we need to keep track of the last element so we can put the text in + ## its tail, if necessary + prev_iter = None + if root.text is None: + root.text = "" + for other in elem: + rest, new_static = Page._generate_inner(root, other) + static.update(new_static) + if isinstance(rest, str): ## Text to add + if prev_iter is not None: ## Add it to the tail + if prev_iter.tail is None: + prev_iter.tail = "" + if prev_iter.tail: + prev_iter.tail += ' ' + prev_iter.tail += rest + else: + if root.text: + root.text += ' ' + root.text += rest + else: ## Otherwise, we update the last element to point to this one + prev_iter = rest + + return root, static def generate_xhtml(self): """Generate the XHTML representation of this page. @@ -147,7 +176,7 @@ class Page: etree.SubElement(head, "link", {"href": "../Styles/page-template.xpgt", "rel": "stylesheet", "type": "application/vnd.adobe-page-template+xml"}) div = etree.SubElement(etree.SubElement(chap, "body"), "div") - static = self._generate_inner(div, self.content) + _, static = self._generate_inner(div, ("", self.content)) return (chap, {"doctype": '<!DOCTYPE html>', "standalone": False}, static) @@ -167,7 +196,7 @@ class BasicTOCPage(Page): if p.type != Page.CONTENT or p.fname is not None: self.toc_ol.append(("li", (("a", {"href": fname}), p.title))) else: - self.toc_ol.append(("li", (("a", {"href": fname}), "{num}. {header}".format(num=self.chapter_count, header=p.title)))) + self.toc_ol.append(("li", (("a", {"href": fname}), "{header}".format(num=self.chapter_count, header=p.title)))) self.chapter_count += 1 def clear(self): |