94 lines
3.1 KiB
Elixir
94 lines
3.1 KiB
Elixir
defmodule Outlook.HtmlPreparations.HtmlPreparation do
|
|
import Nanoid, only: [generate: 0]
|
|
|
|
alias Outlook.InternalTree.InternalNode
|
|
|
|
# treating img as block element because inline images are not desirable
|
|
@block_elements ~w(img address article aside blockquote canvas dd div dl dt fieldset figcaption figure footer form h1 h2 h3 h4 h5 h6 header hr li main nav noscript ol p pre section table tfoot ul video)
|
|
# @inline_elements ~w(a abbr acronym b bdo big br button cite code dfn em i input kbd label map object output q samp script select small span strong sub sup textarea time tt u var)
|
|
|
|
defp clean_atts_to_map(atts) do
|
|
atts_to_keep = ~w(href src)
|
|
atts_to_rename = ~w(class style src-set)
|
|
atts
|
|
|> Enum.reject(fn {k,_} -> k not in (atts_to_keep ++ atts_to_rename) end)
|
|
|> Enum.reject(fn {_,v} -> v == "" end)
|
|
|> Enum.map(fn {k,v} -> {k in atts_to_rename && "#{k}-old" || k, v} end)
|
|
|> Enum.map(fn {k,v} -> {String.to_atom(k),v} end)
|
|
|> Enum.into(%{})
|
|
end
|
|
|
|
def floki_to_internal [ { tag, attributes, content } | rest ] do
|
|
[ %InternalNode{
|
|
name: tag,
|
|
attributes: clean_atts_to_map(attributes),
|
|
type: :element,
|
|
nid: generate(),
|
|
content: floki_to_internal(content)
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal [ "" <> textnode | rest ] do
|
|
[ %InternalNode{
|
|
type: :text,
|
|
nid: generate(),
|
|
content: textnode
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal [ {:comment, comment} | rest ] do
|
|
[ %InternalNode{
|
|
type: :comment,
|
|
nid: generate(),
|
|
content: comment
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal([]), do: []
|
|
|
|
|
|
def set_sibling_with([ node | rest ]) when node.name == "a" do
|
|
[ %InternalNode{ node |
|
|
eph: %{sibling_with: :both}, # <a> may occur at block level (e.g. when enclosing an <img>)
|
|
content: set_sibling_with(node.content)
|
|
} | set_sibling_with(rest) ]
|
|
end
|
|
|
|
def set_sibling_with([ %{type: :element} = node | rest ]) do
|
|
[ %InternalNode{ node |
|
|
eph: %{sibling_with: node.name in @block_elements && :block || :inline},
|
|
content: set_sibling_with(node.content)
|
|
} | set_sibling_with(rest) ]
|
|
end
|
|
|
|
def set_sibling_with([ node | rest ]) do
|
|
sib_with = case node.type do
|
|
:text -> Regex.match?(~r/^\s*$/u, node.content) && :both || :inline
|
|
:comment -> :both
|
|
end
|
|
[ %InternalNode{ node | eph: %{sibling_with: sib_with} } | set_sibling_with(rest) ]
|
|
end
|
|
|
|
def set_sibling_with([]), do: []
|
|
|
|
|
|
def strip_whitespace_textnodes [ %{type: :text} = node | rest] do
|
|
if Regex.match?(~r/^\s*$/u, node.content) do
|
|
strip_whitespace_textnodes(rest)
|
|
else
|
|
[ node | strip_whitespace_textnodes(rest)]
|
|
end
|
|
end
|
|
|
|
def strip_whitespace_textnodes [ %{type: :element} = node | rest] do
|
|
[ %InternalNode{ node | content: strip_whitespace_textnodes(node.content) }
|
|
| strip_whitespace_textnodes(rest) ]
|
|
end
|
|
|
|
def strip_whitespace_textnodes [ node | rest] do
|
|
[ node | strip_whitespace_textnodes(rest) ]
|
|
end
|
|
|
|
def strip_whitespace_textnodes([]), do: []
|
|
end
|