Using :eph from now on to store ephemeral data like :sibling_with. Additionally added cleaning up :eph before saving to db. And renamed InternalTree.Basic to InternalTree.RawInternalBasic to make clear that it contains function for an intermediary tree structure.
75 lines
2.1 KiB
Elixir
75 lines
2.1 KiB
Elixir
defmodule Outlook.InternalTree.RawInternalBasic do
|
|
@moduledoc """
|
|
Function used for the raw_internal_tree which is a transitory state after importing
|
|
Html and before splitting textnodes into %TranslationUnit{}s.
|
|
"""
|
|
|
|
alias Ecto.UUID
|
|
alias Outlook.InternalTree.InternalNode
|
|
alias Outlook.InternalTree.TranslationUnit
|
|
alias Outlook.InternalTree.Html
|
|
|
|
@splitmarker "@@translationunit@@"
|
|
|
|
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
|
[ %InternalNode{textnode |
|
|
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
|
} | set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
|
[ %InternalNode{node | content: set_split_markers(node.content)}
|
|
| set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([ node | rest ]) do
|
|
[ node | set_split_markers(rest) ]
|
|
end
|
|
|
|
def set_split_markers([]), do: []
|
|
|
|
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
|
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
|
:block -> partition_textnodes(node.content)
|
|
:inline -> inline_to_translation_units(node.content)
|
|
_ -> [ node ]
|
|
end
|
|
} | partition_textnodes(rest) ]
|
|
end
|
|
|
|
def partition_textnodes([ node | rest ]) do
|
|
[ node | partition_textnodes(rest) ]
|
|
end
|
|
|
|
def partition_textnodes([]), do: []
|
|
|
|
|
|
defp inline_to_translation_units(contents) do
|
|
contents
|
|
# |> Html.strip_attributes # to be implemented
|
|
|> Html.to_html()
|
|
|> String.split(@splitmarker, trim: true)
|
|
|> Enum.map(fn sentence ->
|
|
%TranslationUnit{
|
|
content: sentence,
|
|
status: :untranslated,
|
|
uuid: UUID.generate()
|
|
}
|
|
end
|
|
)
|
|
end
|
|
|
|
defp contains_elements?(content) do
|
|
|
|
end
|
|
|
|
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
|
def get_sibling_collocation(content) do
|
|
content
|
|
|> Enum.map(fn node -> node.eph.sibling_with end)
|
|
|> Enum.uniq()
|
|
|> List.delete(:both)
|
|
|> List.first
|
|
end
|
|
end
|