defmodule Outlook.InternalTree.RawInternalBasic do @moduledoc """ Function used for the raw_internal_tree which is a transitory state after importing Html and before splitting textnodes into %TranslationUnit{}s. """ alias Ecto.UUID alias Outlook.InternalTree.InternalNode alias Outlook.InternalTree.TranslationUnit alias Outlook.InternalTree.Html @splitmarker "@@translationunit@@" @nonperiodmarker "@@nonperiod@@" def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do [ %InternalNode{textnode | content: textnode.content |> String.replace(~r/([[:upper:]\d])\.(\d)?/u, "\\1#{@nonperiodmarker}\\2") |> String.replace(~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}") |> String.replace(@nonperiodmarker, ".") } | set_split_markers(rest) ] end def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do [ %InternalNode{node | content: set_split_markers(node.content)} | set_split_markers(rest) ] end def set_split_markers([ node | rest ]) do [ node | set_split_markers(rest) ] end def set_split_markers([]), do: [] def partition_to_tunits(raw_tree) do partition_blocklevel(raw_tree) end def partition_blocklevel([ %InternalNode{type: :element} = node | rest ]) do [ %InternalNode{node | content: case get_sibling_collocation(node.content) do :block -> partition_blocklevel(node.content) :inline -> inline_to_translation_units(node.content) nil -> node.content end } | partition_blocklevel(rest) ] end def partition_blocklevel([ node | rest ]) do [ node | partition_blocklevel(rest) ] end def partition_blocklevel([]), do: [] require Logger defp inline_to_translation_units(inline_tree) do Logger.info "inline_tree #{inline_tree |> inspect}" partition_inlinelevel(inline_tree) |> chunk_with_list() |> Html.strip_attributes |> Enum.map(fn sentence -> %TranslationUnit{ content: Html.to_html(sentence), status: :untranslated, uuid: UUID.generate() } end ) end def partition_inlinelevel([ %InternalNode{type: :element} = node | rest ]) do [ partition_inlinelevel(node.content) |> chunk_with_list() |> Enum.map(fn nodelist -> %InternalNode{node | content: nodelist} end) | partition_inlinelevel(rest) ] end def partition_inlinelevel([ %InternalNode{type: :text} = textnode | rest ]) do content = if String.contains?(textnode.content, @splitmarker) do String.split(textnode.content, @splitmarker, trim: true) |> Enum.map(fn cont -> %InternalNode{textnode | content: cont} end) else textnode end [ content | partition_inlinelevel(rest) ] end def partition_inlinelevel([ node | rest ]) do [ node | partition_inlinelevel(rest) ] end def partition_inlinelevel([]), do: [] def flatten_element_contents([ node | rest ]) when is_list(node.content) do [ %InternalNode{node | content: flatten_element_contents(List.flatten(node.content))} | flatten_element_contents(rest) ] end def flatten_element_contents([ node | rest ]) do [ node | flatten_element_contents(rest) ] end def flatten_element_contents([]), do: [] @doc """ iex> chunk_with_list([1, 1, [2, 2], 3, 3, [4, 4, 4], 5, 5]) [[1, 1, 2], [2, 3, 3, 4], [4], [4, 5, 5]] iex> chunk_with_list([1, 1, [1, 2], 2, 2, [2, 3, 4], 4, 4]) [[1, 1, 1], [2, 2, 2, 2], [3], [4, 4, 4]] """ def chunk_with_list(list) do chunk_fun = fn el, acc -> if el do {:cont, [el | acc]} else {:cont, Enum.reverse(acc), []} end end after_fun = fn [] -> {:cont, []} acc -> {:cont, Enum.reverse(acc), []} end Enum.map(list, fn el -> is_list(el) && Enum.intersperse(el, nil) || el end) |> List.flatten() |> Enum.chunk_while([], chunk_fun, after_fun) end @doc "Returns just either :block, :inline or nil. Assumes that it doesn't contain both." def get_sibling_collocation(content) do content |> Enum.map(fn node -> node.eph.sibling_with end) |> Enum.uniq() |> List.delete(:both) |> List.first end end