diff --git a/lib/outlook/articles/internal_tree.ex b/lib/outlook/articles/internal_tree.ex index fedfce0..4938a22 100644 --- a/lib/outlook/articles/internal_tree.ex +++ b/lib/outlook/articles/internal_tree.ex @@ -3,6 +3,7 @@ defmodule Outlook.Articles.InternalTree do alias Outlook.InternalTree.InternalNode alias Outlook.InternalTree.TranslationUnit + alias Outlook.InternalTree.Basic def type, do: :string @@ -16,7 +17,10 @@ defmodule Outlook.Articles.InternalTree do {:ok, Jason.decode!(tree, keys: :atoms!) |> from_json} end - def dump(tree) when is_list(tree), do: {:ok, Jason.encode!(tree)} + def dump(tree) when is_list(tree) do + {:ok, Basic.clean_eph(tree) |> Jason.encode!()} + end + def dump(_), do: :error diff --git a/lib/outlook/html_preparations/html_preparation.ex b/lib/outlook/html_preparations/html_preparation.ex index d8013f4..1b78355 100644 --- a/lib/outlook/html_preparations/html_preparation.ex +++ b/lib/outlook/html_preparations/html_preparation.ex @@ -48,7 +48,7 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do def set_sibling_with([ %{type: :element} = node | rest ]) do [ %InternalNode{ node | - sibling_with: node.name in @block_elements && :block || :inline, + eph: %{sibling_with: node.name in @block_elements && :block || :inline}, content: set_sibling_with(node.content) } | set_sibling_with(rest) ] end @@ -58,11 +58,12 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do :text -> Regex.match?(~r/^\s*$/, node.content) && :both || :inline :comment -> :both end - [ %InternalNode{ node | sibling_with: sib_with } | set_sibling_with(rest) ] + [ %InternalNode{ node | eph: %{sibling_with: sib_with} } | set_sibling_with(rest) ] end def set_sibling_with([ ]), do: ( [ ] ) + def strip_whitespace_textnodes [ %{type: :text} = node | rest] do if Regex.match?(~r/^\s*$/, node.content) do strip_whitespace_textnodes(rest) @@ -71,7 +72,6 @@ defmodule Outlook.HtmlPreparations.HtmlPreparation do end end - def strip_whitespace_textnodes [ %{type: :element} = node | rest] do [ %InternalNode{ node | content: strip_whitespace_textnodes(node.content) } | strip_whitespace_textnodes(rest) ] diff --git a/lib/outlook/internal_tree.ex b/lib/outlook/internal_tree.ex index ac15018..6071d92 100644 --- a/lib/outlook/internal_tree.ex +++ b/lib/outlook/internal_tree.ex @@ -1,6 +1,6 @@ defmodule Outlook.InternalTree do - alias Outlook.InternalTree.{Html,Modifiers,Basic} + alias Outlook.InternalTree.{Html,Modifiers,RawInternalBasic} alias Outlook.HtmlPreparations.HtmlPreparation def render_html(tree) do @@ -11,7 +11,6 @@ defmodule Outlook.InternalTree do def render_html_preview(tree) do tree - |> partition_text |> Html.to_html_preview("1") end @@ -24,7 +23,7 @@ defmodule Outlook.InternalTree do def partition_text(tree) do # validate_sibling_collocation(tree) tree - |> Basic.set_split_markers() - |> Basic.partition_textnodes() + |> RawInternalBasic.set_split_markers() + |> RawInternalBasic.partition_textnodes() end end diff --git a/lib/outlook/internal_tree/basic.ex b/lib/outlook/internal_tree/basic.ex index b4f5964..cb5e50b 100644 --- a/lib/outlook/internal_tree/basic.ex +++ b/lib/outlook/internal_tree/basic.ex @@ -1,70 +1,22 @@ defmodule Outlook.InternalTree.Basic do - alias Ecto.UUID alias Outlook.InternalTree.InternalNode alias Outlook.InternalTree.TranslationUnit - alias Outlook.InternalTree.Html - @splitmarker "@@translationunit@@" - - def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do - [ %InternalNode{textnode | - content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}") - } | set_split_markers(rest) ] + def clean_eph([%TranslationUnit{} = node | rest]) do + [ node | rest ] end - def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do - [ %InternalNode{node | content: set_split_markers(node.content)} - | set_split_markers(rest) ] + def clean_eph([%InternalNode{type: :element} = node | rest]) do + [ %InternalNode{node | + eph: %{}, + content: clean_eph(node.content)} + | clean_eph(rest) ] end - def set_split_markers([ node | rest ]) do - [ node | set_split_markers(rest) ] + def clean_eph([%{type: _} = node | rest]) do + [ %InternalNode{node | eph: %{}} | clean_eph(rest) ] end - def set_split_markers([]), do: [] - - def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do - [ %InternalNode{node | content: case get_sibling_collocation(node.content) do - :block -> partition_textnodes(node.content) - :inline -> inline_to_translation_units(node.content) - _ -> [ node ] - end - } | partition_textnodes(rest) ] - end - - def partition_textnodes([ node | rest ]) do - [ node | partition_textnodes(rest) ] - end - - def partition_textnodes([]), do: [] - - - defp inline_to_translation_units(contents) do - contents - # |> Html.strip_attributes # to be implemented - |> Html.to_html() - |> String.split(@splitmarker, trim: true) - |> Enum.map(fn sentence -> - %TranslationUnit{ - content: sentence, - status: :untranslated, - uuid: UUID.generate() - } - end - ) - end - - defp contains_elements?(content) do - - end - - @doc "Returns just either :block or :inline. Assumes that it doesn't contain both." - def get_sibling_collocation(content) do - content - |> Enum.map(fn node -> node.sibling_with end) - |> Enum.uniq() - |> List.delete(:both) - |> List.first - end + def clean_eph([]), do: [] end diff --git a/lib/outlook/internal_tree/internal_node.ex b/lib/outlook/internal_tree/internal_node.ex index 3a16c5e..489379a 100644 --- a/lib/outlook/internal_tree/internal_node.ex +++ b/lib/outlook/internal_tree/internal_node.ex @@ -1,4 +1,4 @@ defmodule Outlook.InternalTree.InternalNode do @derive Jason.Encoder - defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], sibling_with: nil + defstruct name: "", attributes: %{}, type: :atom, uuid: "", content: [], eph: %{} end diff --git a/lib/outlook/internal_tree/raw_internal_basic.ex b/lib/outlook/internal_tree/raw_internal_basic.ex new file mode 100644 index 0000000..8d9fa52 --- /dev/null +++ b/lib/outlook/internal_tree/raw_internal_basic.ex @@ -0,0 +1,74 @@ +defmodule Outlook.InternalTree.RawInternalBasic do + @moduledoc """ + Function used for the raw_internal_tree which is a transitory state after importing + Html and before splitting textnodes into %TranslationUnit{}s. + """ + + alias Ecto.UUID + alias Outlook.InternalTree.InternalNode + alias Outlook.InternalTree.TranslationUnit + alias Outlook.InternalTree.Html + + @splitmarker "@@translationunit@@" + + def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do + [ %InternalNode{textnode | + content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}") + } | set_split_markers(rest) ] + end + + def set_split_markers([ %InternalNode{type: :element} = node | rest ]) do + [ %InternalNode{node | content: set_split_markers(node.content)} + | set_split_markers(rest) ] + end + + def set_split_markers([ node | rest ]) do + [ node | set_split_markers(rest) ] + end + + def set_split_markers([]), do: [] + + def partition_textnodes([ %InternalNode{type: :element} = node | rest ]) do + [ %InternalNode{node | content: case get_sibling_collocation(node.content) do + :block -> partition_textnodes(node.content) + :inline -> inline_to_translation_units(node.content) + _ -> [ node ] + end + } | partition_textnodes(rest) ] + end + + def partition_textnodes([ node | rest ]) do + [ node | partition_textnodes(rest) ] + end + + def partition_textnodes([]), do: [] + + + defp inline_to_translation_units(contents) do + contents + # |> Html.strip_attributes # to be implemented + |> Html.to_html() + |> String.split(@splitmarker, trim: true) + |> Enum.map(fn sentence -> + %TranslationUnit{ + content: sentence, + status: :untranslated, + uuid: UUID.generate() + } + end + ) + end + + defp contains_elements?(content) do + + end + + @doc "Returns just either :block or :inline. Assumes that it doesn't contain both." + def get_sibling_collocation(content) do + content + |> Enum.map(fn node -> node.eph.sibling_with end) + |> Enum.uniq() + |> List.delete(:both) + |> List.first + end +end diff --git a/lib/outlook_web/live/article_live/new.ex b/lib/outlook_web/live/article_live/new.ex index f8ed083..9cf0f13 100644 --- a/lib/outlook_web/live/article_live/new.ex +++ b/lib/outlook_web/live/article_live/new.ex @@ -4,7 +4,7 @@ defmodule OutlookWeb.ArticleLive.New do import OutlookWeb.ArticleLive.NewComponents alias OutlookWeb.ArticleLive.FormComponent - alias Outlook.{Articles,Authors,HtmlPreparations} + alias Outlook.{Articles,Authors,HtmlPreparations,InternalTree} alias Articles.{Article,RawHtmlInput} require Logger @@ -52,6 +52,9 @@ defmodule OutlookWeb.ArticleLive.New do @impl true def handle_event("approve_raw_internaltree", _, socket) do + socket = socket + |> assign(:raw_internal_tree, + InternalTree.partition_text(socket.assigns.raw_internal_tree)) {:noreply, socket |> assign(:step, :review_translation_units)} end