Add :eph field to %TranslationUnit{} and remove :sibling_with
Using :eph from now on to store ephemeral data like :sibling_with. Additionally added cleaning up :eph before saving to db. And renamed InternalTree.Basic to InternalTree.RawInternalBasic to make clear that it contains function for an intermediary tree structure.
This commit is contained in:
@ -1,70 +1,22 @@
|
||||
defmodule Outlook.InternalTree.Basic do
|
||||
|
||||
alias Ecto.UUID
|
||||
alias Outlook.InternalTree.InternalNode
|
||||
alias Outlook.InternalTree.TranslationUnit
|
||||
alias Outlook.InternalTree.Html
|
||||
|
||||
@splitmarker "@@translationunit@@"
|
||||
|
||||
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
||||
[ %InternalNode{textnode |
|
||||
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
||||
} | set_split_markers(rest) ]
|
||||
def clean_eph([%TranslationUnit{} = node | rest]) do
|
||||
[ node | rest ]
|
||||
end
|
||||
|
||||
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
||||
[ %InternalNode{node | content: set_split_markers(node.content)}
|
||||
| set_split_markers(rest) ]
|
||||
def clean_eph([%InternalNode{type: :element} = node | rest]) do
|
||||
[ %InternalNode{node |
|
||||
eph: %{},
|
||||
content: clean_eph(node.content)}
|
||||
| clean_eph(rest) ]
|
||||
end
|
||||
|
||||
def set_split_markers([ node | rest ]) do
|
||||
[ node | set_split_markers(rest) ]
|
||||
def clean_eph([%{type: _} = node | rest]) do
|
||||
[ %InternalNode{node | eph: %{}} | clean_eph(rest) ]
|
||||
end
|
||||
|
||||
def set_split_markers([]), do: []
|
||||
|
||||
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
||||
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
||||
:block -> partition_textnodes(node.content)
|
||||
:inline -> inline_to_translation_units(node.content)
|
||||
_ -> [ node ]
|
||||
end
|
||||
} | partition_textnodes(rest) ]
|
||||
end
|
||||
|
||||
def partition_textnodes([ node | rest ]) do
|
||||
[ node | partition_textnodes(rest) ]
|
||||
end
|
||||
|
||||
def partition_textnodes([]), do: []
|
||||
|
||||
|
||||
defp inline_to_translation_units(contents) do
|
||||
contents
|
||||
# |> Html.strip_attributes # to be implemented
|
||||
|> Html.to_html()
|
||||
|> String.split(@splitmarker, trim: true)
|
||||
|> Enum.map(fn sentence ->
|
||||
%TranslationUnit{
|
||||
content: sentence,
|
||||
status: :untranslated,
|
||||
uuid: UUID.generate()
|
||||
}
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
defp contains_elements?(content) do
|
||||
|
||||
end
|
||||
|
||||
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
||||
def get_sibling_collocation(content) do
|
||||
content
|
||||
|> Enum.map(fn node -> node.sibling_with end)
|
||||
|> Enum.uniq()
|
||||
|> List.delete(:both)
|
||||
|> List.first
|
||||
end
|
||||
def clean_eph([]), do: []
|
||||
end
|
||||
|
||||
@ -1,4 +1,4 @@
|
||||
defmodule Outlook.InternalTree.InternalNode do
|
||||
@derive Jason.Encoder
|
||||
defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], sibling_with: nil
|
||||
defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], eph: %{}
|
||||
end
|
||||
|
||||
74
lib/outlook/internal_tree/raw_internal_basic.ex
Normal file
74
lib/outlook/internal_tree/raw_internal_basic.ex
Normal file
@ -0,0 +1,74 @@
|
||||
defmodule Outlook.InternalTree.RawInternalBasic do
|
||||
@moduledoc """
|
||||
Function used for the raw_internal_tree which is a transitory state after importing
|
||||
Html and before splitting textnodes into %TranslationUnit{}s.
|
||||
"""
|
||||
|
||||
alias Ecto.UUID
|
||||
alias Outlook.InternalTree.InternalNode
|
||||
alias Outlook.InternalTree.TranslationUnit
|
||||
alias Outlook.InternalTree.Html
|
||||
|
||||
@splitmarker "@@translationunit@@"
|
||||
|
||||
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
||||
[ %InternalNode{textnode |
|
||||
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
||||
} | set_split_markers(rest) ]
|
||||
end
|
||||
|
||||
def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do
|
||||
[ %InternalNode{node | content: set_split_markers(node.content)}
|
||||
| set_split_markers(rest) ]
|
||||
end
|
||||
|
||||
def set_split_markers([ node | rest ]) do
|
||||
[ node | set_split_markers(rest) ]
|
||||
end
|
||||
|
||||
def set_split_markers([]), do: []
|
||||
|
||||
def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do
|
||||
[ %InternalNode{node | content: case get_sibling_collocation(node.content) do
|
||||
:block -> partition_textnodes(node.content)
|
||||
:inline -> inline_to_translation_units(node.content)
|
||||
_ -> [ node ]
|
||||
end
|
||||
} | partition_textnodes(rest) ]
|
||||
end
|
||||
|
||||
def partition_textnodes([ node | rest ]) do
|
||||
[ node | partition_textnodes(rest) ]
|
||||
end
|
||||
|
||||
def partition_textnodes([]), do: []
|
||||
|
||||
|
||||
defp inline_to_translation_units(contents) do
|
||||
contents
|
||||
# |> Html.strip_attributes # to be implemented
|
||||
|> Html.to_html()
|
||||
|> String.split(@splitmarker, trim: true)
|
||||
|> Enum.map(fn sentence ->
|
||||
%TranslationUnit{
|
||||
content: sentence,
|
||||
status: :untranslated,
|
||||
uuid: UUID.generate()
|
||||
}
|
||||
end
|
||||
)
|
||||
end
|
||||
|
||||
defp contains_elements?(content) do
|
||||
|
||||
end
|
||||
|
||||
@doc "Returns just either :block or :inline. Assumes that it doesn't contain both."
|
||||
def get_sibling_collocation(content) do
|
||||
content
|
||||
|> Enum.map(fn node -> node.eph.sibling_with end)
|
||||
|> Enum.uniq()
|
||||
|> List.delete(:both)
|
||||
|> List.first
|
||||
end
|
||||
end
|
||||
Reference in New Issue
Block a user