Using :eph from now on to store ephemeral data like :sibling_with. Additionally added cleaning up :eph before saving to db. And renamed InternalTree.Basic to InternalTree.RawInternalBasic to make clear that it contains function for an intermediary tree structure.
102 lines
3.3 KiB
Elixir
102 lines
3.3 KiB
Elixir
defmodule Outlook.HtmlPreparations.HtmlPreparation do
|
|
import Ecto.UUID, only: [generate: 0]
|
|
|
|
alias Outlook.InternalTree.InternalNode
|
|
|
|
@block_elements ["address","article","aside","blockquote","canvas","dd","div","dl","dt","fieldset","figcaption","figure","footer","form","h1","h2","h3","h4","h5","h6","header","hr","li","main","nav","noscript","ol","p","pre","section","table","tfoot","ul","video"]
|
|
# @inline_elements ["a","abbr","acronym","b","bdo","big","br","button","cite","code","dfn","em","i","img","input","kbd","label","map","object","output","q","samp","script","select","small","span","strong","sub","sup","textarea","time","tt","u","var"]
|
|
|
|
defp clean_atts_to_map(atts) do
|
|
atts_to_keep = ~w(href src)
|
|
atts_to_rename = ~w(class style src-set)
|
|
atts
|
|
|> Enum.reject(fn {k,_} -> k not in (atts_to_keep ++ atts_to_rename) end)
|
|
|> Enum.reject(fn {_,v} -> v == "" end)
|
|
|> Enum.map(fn {k,v} -> {k in atts_to_rename && "#{k}-old" || k, v} end)
|
|
|> Enum.map(fn {k,v} -> {String.to_atom(k),v} end)
|
|
|> Enum.into(%{})
|
|
end
|
|
|
|
def floki_to_internal [ { tag, attributes, content } | rest ] do
|
|
[ %InternalNode{
|
|
name: tag,
|
|
attributes: clean_atts_to_map(attributes),
|
|
type: :element,
|
|
uuid: generate(),
|
|
content: floki_to_internal(content)
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal [ "" <> textnode | rest ] do
|
|
[ %InternalNode{
|
|
type: :text,
|
|
uuid: generate(),
|
|
content: textnode
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal [ {:comment, comment} | rest ] do
|
|
[ %InternalNode{
|
|
type: :comment,
|
|
uuid: generate(),
|
|
content: comment
|
|
} | floki_to_internal(rest) ]
|
|
end
|
|
|
|
def floki_to_internal([ ]), do: ( [ ] )
|
|
|
|
|
|
def set_sibling_with([ %{type: :element} = node | rest ]) do
|
|
[ %InternalNode{ node |
|
|
eph: %{sibling_with: node.name in @block_elements && :block || :inline},
|
|
content: set_sibling_with(node.content)
|
|
} | set_sibling_with(rest) ]
|
|
end
|
|
|
|
def set_sibling_with([ node | rest ]) do
|
|
sib_with = case node.type do
|
|
:text -> Regex.match?(~r/^\s*$/, node.content) && :both || :inline
|
|
:comment -> :both
|
|
end
|
|
[ %InternalNode{ node | eph: %{sibling_with: sib_with} } | set_sibling_with(rest) ]
|
|
end
|
|
|
|
def set_sibling_with([ ]), do: ( [ ] )
|
|
|
|
|
|
def strip_whitespace_textnodes [ %{type: :text} = node | rest] do
|
|
if Regex.match?(~r/^\s*$/, node.content) do
|
|
strip_whitespace_textnodes(rest)
|
|
else
|
|
[ node | strip_whitespace_textnodes(rest)]
|
|
end
|
|
end
|
|
|
|
def strip_whitespace_textnodes [ %{type: :element} = node | rest] do
|
|
[ %InternalNode{ node | content: strip_whitespace_textnodes(node.content) }
|
|
| strip_whitespace_textnodes(rest) ]
|
|
end
|
|
|
|
def strip_whitespace_textnodes [ node | rest] do
|
|
[ node | strip_whitespace_textnodes(rest) ]
|
|
end
|
|
|
|
def strip_whitespace_textnodes([]), do: []
|
|
|
|
|
|
def build_indentation_list [ %{type: :element} = node | rest], level do
|
|
[ %{node: Map.replace(node, :content, []), level: level}
|
|
| [ build_indentation_list(node.content, level + 1)
|
|
| build_indentation_list(rest, level)
|
|
]
|
|
] |> List.flatten
|
|
end
|
|
|
|
def build_indentation_list [ node | rest ], level do
|
|
[ %{node: node, level: level}
|
|
| build_indentation_list( rest, level ) ]
|
|
end
|
|
|
|
def build_indentation_list([ ], _), do: []
|
|
end
|