Update partitioning to strip empty elements

This commit is contained in:
Thelonius Kort
2023-01-12 22:48:33 +01:00
parent e45e3597c9
commit b7db9cdd8e
2 changed files with 23 additions and 2 deletions

View File

@ -53,6 +53,7 @@ defmodule Outlook.InternalTree.RawInternalBasic do
defp inline_to_translation_units(inline_tree) do
partition_inlinelevel(inline_tree)
|> chunk_with_list()
|> Enum.map(fn sentence -> strip_empty_nodes(sentence) end)
|> Enum.map(fn sentence -> Html.strip_attributes(sentence) end)
|> Enum.map(fn sentence ->
%TranslationUnit{
@ -90,6 +91,25 @@ defmodule Outlook.InternalTree.RawInternalBasic do
def partition_inlinelevel([]), do: []
def strip_empty_nodes([%{type: :element} = node | rest]) do
content = strip_empty_nodes(node.content)
case content do
[] -> strip_empty_nodes(rest)
_ -> [ %InternalNode{node | content: content} | strip_empty_nodes(rest) ]
end
end
def strip_empty_nodes([%{type: :text, content: ""} | rest]) do
strip_empty_nodes(rest)
end
def strip_empty_nodes([node | rest]) do
[ node | strip_empty_nodes(rest) ]
end
def strip_empty_nodes([]), do: []
def strip_empty_tunits([ %TranslationUnit{content: ""} | rest]) do
strip_empty_tunits(rest)
end

View File

@ -153,7 +153,8 @@ defmodule Outlook.InternalTreeTest do
%Outlook.InternalTree.InternalNode{
name: "a",
attributes: %{
href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/"
href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/",
bullshit: "bollocks"
},
type: :element,
nid: "qxCrs0csHDLI",
@ -197,7 +198,7 @@ defmodule Outlook.InternalTreeTest do
%Outlook.InternalTree.TranslationUnit{
status: :untranslated,
nid: "xxxxxx",
content: "<a href=\"https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/\"></a> In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ",
content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ",
eph: %{}
}
],