diff --git a/lib/outlook/internal_tree/raw_internal_basic.ex b/lib/outlook/internal_tree/raw_internal_basic.ex index 025ea72..5b98ad4 100644 --- a/lib/outlook/internal_tree/raw_internal_basic.ex +++ b/lib/outlook/internal_tree/raw_internal_basic.ex @@ -53,6 +53,7 @@ defmodule Outlook.InternalTree.RawInternalBasic do defp inline_to_translation_units(inline_tree) do partition_inlinelevel(inline_tree) |> chunk_with_list() + |> Enum.map(fn sentence -> strip_empty_nodes(sentence) end) |> Enum.map(fn sentence -> Html.strip_attributes(sentence) end) |> Enum.map(fn sentence -> %TranslationUnit{ @@ -90,6 +91,25 @@ defmodule Outlook.InternalTree.RawInternalBasic do def partition_inlinelevel([]), do: [] + def strip_empty_nodes([%{type: :element} = node | rest]) do + content = strip_empty_nodes(node.content) + case content do + [] -> strip_empty_nodes(rest) + _ -> [ %InternalNode{node | content: content} | strip_empty_nodes(rest) ] + end + end + + def strip_empty_nodes([%{type: :text, content: ""} | rest]) do + strip_empty_nodes(rest) + end + + def strip_empty_nodes([node | rest]) do + [ node | strip_empty_nodes(rest) ] + end + + def strip_empty_nodes([]), do: [] + + def strip_empty_tunits([ %TranslationUnit{content: ""} | rest]) do strip_empty_tunits(rest) end diff --git a/test/outlook/internaltree_test.exs b/test/outlook/internaltree_test.exs index a1ee710..67bf1d7 100644 --- a/test/outlook/internaltree_test.exs +++ b/test/outlook/internaltree_test.exs @@ -153,7 +153,8 @@ defmodule Outlook.InternalTreeTest do %Outlook.InternalTree.InternalNode{ name: "a", attributes: %{ - href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/" + href: "https://www.politico.eu/article/fit-for-55-eu-5-things-to-know/", + bullshit: "bollocks" }, type: :element, nid: "qxCrs0csHDLI", @@ -197,7 +198,7 @@ defmodule Outlook.InternalTreeTest do %Outlook.InternalTree.TranslationUnit{ status: :untranslated, nid: "xxxxxx", - content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ", + content: " In reality it will destroy the transport industry, steel, cement as well as coal and gas fuel electric generation. ", eph: %{} } ],