aboutsummaryrefslogtreecommitdiff
path: root/Lib/fontTools/subset/svg.py
diff options
context:
space:
mode:
Diffstat (limited to 'Lib/fontTools/subset/svg.py')
-rw-r--r--Lib/fontTools/subset/svg.py248
1 files changed, 248 insertions, 0 deletions
diff --git a/Lib/fontTools/subset/svg.py b/Lib/fontTools/subset/svg.py
new file mode 100644
index 00000000..e25fb3e6
--- /dev/null
+++ b/Lib/fontTools/subset/svg.py
@@ -0,0 +1,248 @@
+from __future__ import annotations
+
+import re
+from functools import lru_cache
+from itertools import chain, count
+from typing import Dict, Iterable, Iterator, List, Optional, Set, Tuple
+
+try:
+ from lxml import etree
+except ModuleNotFoundError:
+ # lxml is required for subsetting SVG, but we prefer to delay the import error
+ # until subset_glyphs() is called (i.e. if font to subset has an 'SVG ' table)
+ etree = None
+
+from fontTools import ttLib
+from fontTools.subset.util import _add_method
+
+
+__all__ = ["subset_glyphs"]
+
+
+GID_RE = re.compile(r"^glyph(\d+)$")
+
+NAMESPACES = {
+ "svg": "http://www.w3.org/2000/svg",
+ "xlink": "http://www.w3.org/1999/xlink",
+}
+XLINK_HREF = f'{{{NAMESPACES["xlink"]}}}href'
+
+
+# TODO(antrotype): Replace with functools.cache once we are 3.9+
+@lru_cache(maxsize=None)
+def xpath(path):
+ # compile XPath upfront, caching result to reuse on multiple elements
+ return etree.XPath(path, namespaces=NAMESPACES)
+
+
+def group_elements_by_id(tree: etree.Element) -> Dict[str, etree.Element]:
+ # select all svg elements with 'id' attribute no matter where they are
+ # including the root element itself:
+ # https://github.com/fonttools/fonttools/issues/2548
+ return {el.attrib["id"]: el for el in xpath("//svg:*[@id]")(tree)}
+
+
+def parse_css_declarations(style_attr: str) -> Dict[str, str]:
+ # https://developer.mozilla.org/en-US/docs/Web/SVG/Attribute/style
+ # https://developer.mozilla.org/en-US/docs/Web/CSS/Syntax#css_declarations
+ result = {}
+ for declaration in style_attr.split(";"):
+ if declaration.count(":") == 1:
+ property_name, value = declaration.split(":")
+ property_name = property_name.strip()
+ result[property_name] = value.strip()
+ elif declaration.strip():
+ raise ValueError(f"Invalid CSS declaration syntax: {declaration}")
+ return result
+
+
+def iter_referenced_ids(tree: etree.Element) -> Iterator[str]:
+ # Yield all the ids that can be reached via references from this element tree.
+ # We currently support xlink:href (as used by <use> and gradient templates),
+ # and local url(#...) links found in fill or clip-path attributes
+ # TODO(anthrotype): Check we aren't missing other supported kinds of reference
+ find_svg_elements_with_references = xpath(
+ ".//svg:*[ "
+ "starts-with(@xlink:href, '#') "
+ "or starts-with(@fill, 'url(#') "
+ "or starts-with(@clip-path, 'url(#') "
+ "or contains(@style, ':url(#') "
+ "]",
+ )
+ for el in chain([tree], find_svg_elements_with_references(tree)):
+ ref_id = href_local_target(el)
+ if ref_id is not None:
+ yield ref_id
+
+ attrs = el.attrib
+ if "style" in attrs:
+ attrs = {**attrs, **parse_css_declarations(el.attrib["style"])}
+ for attr in ("fill", "clip-path"):
+ if attr in attrs:
+ value = attrs[attr]
+ if value.startswith("url(#") and value.endswith(")"):
+ ref_id = value[5:-1]
+ assert ref_id
+ yield ref_id
+
+
+def closure_element_ids(
+ elements: Dict[str, etree.Element], element_ids: Set[str]
+) -> None:
+ # Expand the initial subset of element ids to include ids that can be reached
+ # via references from the initial set.
+ unvisited = element_ids
+ while unvisited:
+ referenced: Set[str] = set()
+ for el_id in unvisited:
+ if el_id not in elements:
+ # ignore dangling reference; not our job to validate svg
+ continue
+ referenced.update(iter_referenced_ids(elements[el_id]))
+ referenced -= element_ids
+ element_ids.update(referenced)
+ unvisited = referenced
+
+
+def subset_elements(el: etree.Element, retained_ids: Set[str]) -> bool:
+ # Keep elements if their id is in the subset, or any of their children's id is.
+ # Drop elements whose id is not in the subset, and either have no children,
+ # or all their children are being dropped.
+ if el.attrib.get("id") in retained_ids:
+ # if id is in the set, don't recurse; keep whole subtree
+ return True
+ # recursively subset all the children; we use a list comprehension instead
+ # of a parentheses-less generator expression because we don't want any() to
+ # short-circuit, as our function has a side effect of dropping empty elements.
+ if any([subset_elements(e, retained_ids) for e in el]):
+ return True
+ assert len(el) == 0
+ parent = el.getparent()
+ if parent is not None:
+ parent.remove(el)
+ return False
+
+
+def remap_glyph_ids(
+ svg: etree.Element, glyph_index_map: Dict[int, int]
+) -> Dict[str, str]:
+ # Given {old_gid: new_gid} map, rename all elements containing id="glyph{gid}"
+ # special attributes
+ elements = group_elements_by_id(svg)
+ id_map = {}
+ for el_id, el in elements.items():
+ m = GID_RE.match(el_id)
+ if not m:
+ continue
+ old_index = int(m.group(1))
+ new_index = glyph_index_map.get(old_index)
+ if new_index is not None:
+ if old_index == new_index:
+ continue
+ new_id = f"glyph{new_index}"
+ else:
+ # If the old index is missing, the element correspond to a glyph that was
+ # excluded from the font's subset.
+ # We rename it to avoid clashes with the new GIDs or other element ids.
+ new_id = f".{el_id}"
+ n = count(1)
+ while new_id in elements:
+ new_id = f"{new_id}.{next(n)}"
+
+ id_map[el_id] = new_id
+ el.attrib["id"] = new_id
+
+ return id_map
+
+
+def href_local_target(el: etree.Element) -> Optional[str]:
+ if XLINK_HREF in el.attrib:
+ href = el.attrib[XLINK_HREF]
+ if href.startswith("#") and len(href) > 1:
+ return href[1:] # drop the leading #
+ return None
+
+
+def update_glyph_href_links(svg: etree.Element, id_map: Dict[str, str]) -> None:
+ # update all xlink:href="#glyph..." attributes to point to the new glyph ids
+ for el in xpath(".//svg:*[starts-with(@xlink:href, '#glyph')]")(svg):
+ old_id = href_local_target(el)
+ assert old_id is not None
+ if old_id in id_map:
+ new_id = id_map[old_id]
+ el.attrib[XLINK_HREF] = f"#{new_id}"
+
+
+def ranges(ints: Iterable[int]) -> Iterator[Tuple[int, int]]:
+ # Yield sorted, non-overlapping (min, max) ranges of consecutive integers
+ sorted_ints = iter(sorted(set(ints)))
+ try:
+ start = end = next(sorted_ints)
+ except StopIteration:
+ return
+ for v in sorted_ints:
+ if v - 1 == end:
+ end = v
+ else:
+ yield (start, end)
+ start = end = v
+ yield (start, end)
+
+
+@_add_method(ttLib.getTableClass("SVG "))
+def subset_glyphs(self, s) -> bool:
+ if etree is None:
+ raise ModuleNotFoundError("No module named 'lxml', required to subset SVG")
+
+ # glyph names (before subsetting)
+ glyph_order: List[str] = s.orig_glyph_order
+ # map from glyph names to original glyph indices
+ rev_orig_glyph_map: Dict[str, int] = s.reverseOrigGlyphMap
+ # map from original to new glyph indices (after subsetting)
+ glyph_index_map: Dict[int, int] = s.glyph_index_map
+
+ new_docs: List[Tuple[bytes, int, int]] = []
+ for doc, start, end in self.docList:
+
+ glyphs = {glyph_order[i] for i in range(start, end + 1)}.intersection(s.glyphs)
+ if not glyphs:
+ # no intersection: we can drop the whole record
+ continue
+
+ svg = etree.fromstring(
+ # encode because fromstring dislikes xml encoding decl if input is str.
+ # SVG xml encoding must be utf-8 as per OT spec.
+ doc.encode("utf-8"),
+ parser=etree.XMLParser(
+ # Disable libxml2 security restrictions to support very deep trees.
+ # Without this we would get an error like this:
+ # `lxml.etree.XMLSyntaxError: internal error: Huge input lookup`
+ # when parsing big fonts e.g. noto-emoji-picosvg.ttf.
+ huge_tree=True,
+ # ignore blank text as it's not meaningful in OT-SVG; it also prevents
+ # dangling tail text after removing an element when pretty_print=True
+ remove_blank_text=True,
+ ),
+ )
+
+ elements = group_elements_by_id(svg)
+ gids = {rev_orig_glyph_map[g] for g in glyphs}
+ element_ids = {f"glyph{i}" for i in gids}
+ closure_element_ids(elements, element_ids)
+
+ if not subset_elements(svg, element_ids):
+ continue
+
+ if not s.options.retain_gids:
+ id_map = remap_glyph_ids(svg, glyph_index_map)
+ update_glyph_href_links(svg, id_map)
+
+ new_doc = etree.tostring(svg, pretty_print=s.options.pretty_svg).decode("utf-8")
+
+ new_gids = (glyph_index_map[i] for i in gids)
+ for start, end in ranges(new_gids):
+ new_docs.append((new_doc, start, end))
+
+ self.docList = new_docs
+
+ return bool(self.docList)