From 3a2af2adb407406cc4b35e9a20accbec0800903e Mon Sep 17 00:00:00 2001 From: Thelonius Kort Date: Tue, 28 Mar 2023 14:48:26 +0200 Subject: [PATCH] Add TunitModifications with first modifier unite_with_next --- .iex-local.exs | 2 +- lib/outlook/internal_tree.ex | 6 +- .../internal_tree/tunit_modifications.ex | 83 +++++++++++++++++++ .../internaltree_tunitmodifications_test.exs | 83 +++++++++++++++++++ 4 files changed, 172 insertions(+), 2 deletions(-) create mode 100644 lib/outlook/internal_tree/tunit_modifications.ex create mode 100644 test/outlook/internaltree_tunitmodifications_test.exs diff --git a/.iex-local.exs b/.iex-local.exs index 24c171f..0b14c65 100644 --- a/.iex-local.exs +++ b/.iex-local.exs @@ -1,6 +1,6 @@ alias Outlook.HtmlPreparations alias Outlook.HtmlPreparations.HtmlPreparation -alias Outlook.InternalTree.{Html,InternalNode,TranslationUnit} +alias Outlook.InternalTree.{Html,InternalNode,TranslationUnit,TunitModifications} alias Outlook.InternalTree alias Outlook.Articles alias Outlook.Accounts diff --git a/lib/outlook/internal_tree.ex b/lib/outlook/internal_tree.ex index a226202..566bd12 100644 --- a/lib/outlook/internal_tree.ex +++ b/lib/outlook/internal_tree.ex @@ -1,6 +1,6 @@ defmodule Outlook.InternalTree do - alias Outlook.InternalTree.{Html,Modifiers,RawInternalBasic,InternalTree,Translation} + alias Outlook.InternalTree.{Html,Modifiers,RawInternalBasic,InternalTree,Translation,TunitModifications} alias Outlook.HtmlPreparations.HtmlPreparation alias Outlook.{Hyphenation, Translations} @@ -71,4 +71,8 @@ defmodule Outlook.InternalTree do InternalTree.collect_tunit_ids(tree) # |> List.flatten() end + + def modify_tunits(tree, modifier, tu_ids) do + TunitModifications.apply_modifier(tree, modifier, tu_ids) + end end diff --git a/lib/outlook/internal_tree/tunit_modifications.ex b/lib/outlook/internal_tree/tunit_modifications.ex new file mode 100644 index 0000000..8d382e8 --- /dev/null +++ b/lib/outlook/internal_tree/tunit_modifications.ex @@ -0,0 +1,83 @@ +defmodule Outlook.InternalTree.TunitModifications do + + alias Outlook.InternalTree.{InternalNode,TranslationUnit} + + require Logger + + def modifiers do + [ + %{ + name: "unite_with_next", + fn: &unite_with_next/2, + label: "Unite with next", + description: "unite translation unit with next" + }, + %{ + name: "split_tunit", + fn: &split_tunit/2, + label: "Split Translation unit", + description: "split translation unit into two" + } + ] + end + + # Modifier functions + + defp unite_with_next(nodelist, tu_ids) when is_list(tu_ids) do + ids_to_process = Enum.reverse(tu_ids) + Logger.info "nu: #{inspect tu_ids}" + Enum.reduce(ids_to_process, nodelist, fn id, nodes -> unite_with_next(nodes, id) end) + end + defp unite_with_next(nodelist, tu_id) do + ind = Enum.find_index(nodelist, fn n -> n.nid == tu_id end) + case Enum.slice(nodelist, ind, 2) do + [unit, next] -> + nunit = %TranslationUnit{unit | content: unit.content <> next.content} + nodelist + |> List.replace_at(ind, nunit) + |> List.delete_at(ind + 1) + [_] -> + nodelist + end + end + + defp split_tunit(_nodelist, _tu_ids) do + end + + # Function and helpers to apply modifiers + + def apply_modifier([ %InternalNode{} = node | rest ], modifier, tu_ids) when node.type == :element do + content = case List.first(node.content) do + %TranslationUnit{} -> process_tunit_list(node.content, modifier, tu_ids) + _ -> apply_modifier(node.content, modifier, tu_ids) + end + [%InternalNode{node| content: content} | apply_modifier(rest, modifier, tu_ids)] + end + def apply_modifier([node, rest], modifier, tu_ids), do: [node | apply_modifier(rest, modifier, tu_ids)] + def apply_modifier([],_, _), do: [] + + def process_tunit_list(tunits, modifier, tu_ids) do + modi_fun = get_modi_fun(modifier) + ids_to_process = get_ids_to_process(tunits, tu_ids) + case length(ids_to_process) do + 0 -> tunits + _ -> modi_fun.(tunits, ids_to_process) + end + end + + @doc false + def get_ids_to_process(tunits, tu_ids) do + present_ids = Enum.map(tunits, fn u -> u.nid end) + found_ids = MapSet.new(present_ids) + |> MapSet.intersection(MapSet.new(tu_ids)) + |> MapSet.to_list() + # make sure to return ids in the order they occur in tunits + Enum.filter(present_ids, fn pres -> pres in found_ids end) + end + + defp get_modi_fun(modifier) do + modifiers() + |> Enum.find(fn m -> m.name == modifier end) + |> Map.get(:fn) + end +end diff --git a/test/outlook/internaltree_tunitmodifications_test.exs b/test/outlook/internaltree_tunitmodifications_test.exs new file mode 100644 index 0000000..e73076e --- /dev/null +++ b/test/outlook/internaltree_tunitmodifications_test.exs @@ -0,0 +1,83 @@ +defmodule Outlook.InternalTreeTest do + use Outlook.DataCase + + # import Outlook.InternalTreeTestHelpers + + describe "internal_tree" do + alias Outlook.InternalTree + alias Outlook.InternalTree.{InternalNode,TranslationUnit} + + def tree() do + [ + %InternalNode{ + name: "p", + attributes: %{}, + type: :element, + nid: "rRIib2h8tyix", + content: [ + %TranslationUnit{status: :untranslated, nid: "GuU9v6xeSS7e", content: "Joe Biden a.", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "bzCLsYGNe2PG", content: "k.", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "GyRUrzwH9LcP", content: "a. ", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "y2yb38U4hkya", content: "Crash Test Dummy.", eph: %{}} + ], + eph: %{sibling_with: :block} + } + ] + end + + test "unite_with_next unites with next in simple case" do + assert InternalTree.modify_tunits(tree(), "unite_with_next", ["bzCLsYGNe2PG"]) == [ + %InternalNode{ + name: "p", + attributes: %{}, + type: :element, + nid: "rRIib2h8tyix", + content: [ + %TranslationUnit{status: :untranslated, nid: "GuU9v6xeSS7e", content: "Joe Biden a.", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "bzCLsYGNe2PG", content: "k.a. ", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "y2yb38U4hkya", content: "Crash Test Dummy.", eph: %{}} + ], + eph: %{sibling_with: :block} + } + ] + end + + test "unite_with_next unites all with next in complex case" do + assert InternalTree.modify_tunits(tree(), "unite_with_next", ["GuU9v6xeSS7e","bzCLsYGNe2PG","GyRUrzwH9LcP"]) == [ + %InternalNode{ + name: "p", + attributes: %{}, + type: :element, + nid: "rRIib2h8tyix", + content: [ + %TranslationUnit{ + status: :untranslated, + nid: "GuU9v6xeSS7e", + content: "Joe Biden a.k.a. Crash Test Dummy.", + eph: %{} + } + ], + eph: %{sibling_with: :block} + } + ] + end + + test "unite_with_next ignores id if there is no next tunit" do + assert InternalTree.modify_tunits(tree(), "unite_with_next", ["y2yb38U4hkya"]) == [ + %InternalNode{ + name: "p", + attributes: %{}, + type: :element, + nid: "rRIib2h8tyix", + content: [ + %TranslationUnit{status: :untranslated, nid: "GuU9v6xeSS7e", content: "Joe Biden a.", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "bzCLsYGNe2PG", content: "k.", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "GyRUrzwH9LcP", content: "a. ", eph: %{}}, + %TranslationUnit{status: :untranslated, nid: "y2yb38U4hkya", content: "Crash Test Dummy.", eph: %{}} + ], + eph: %{sibling_with: :block} + } + ] + end + end +end