Add ignoring non-period points/dots
This commit is contained in:
@ -10,10 +10,14 @@ defmodule Outlook.InternalTree.RawInternalBasic do
|
|||||||
alias Outlook.InternalTree.Html
|
alias Outlook.InternalTree.Html
|
||||||
|
|
||||||
@splitmarker "@@translationunit@@"
|
@splitmarker "@@translationunit@@"
|
||||||
|
@nonperiodmarker "@@nonperiod@@"
|
||||||
|
|
||||||
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
def set_split_markers([ %InternalNode{type: :text} = textnode | rest ]) do
|
||||||
[ %InternalNode{textnode |
|
[ %InternalNode{textnode |
|
||||||
content: String.replace(textnode.content, ~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
content: textnode.content
|
||||||
|
|> String.replace(~r/([[:upper:]\d])\.(\d)?/u, "\\1#{@nonperiodmarker}\\2")
|
||||||
|
|> String.replace(~r|([.?!]["'”]?\s*)|u, "\\1#{@splitmarker}")
|
||||||
|
|> String.replace(@nonperiodmarker, ".")
|
||||||
} | set_split_markers(rest) ]
|
} | set_split_markers(rest) ]
|
||||||
end
|
end
|
||||||
|
|
||||||
|
|||||||
@ -87,6 +87,48 @@ defmodule Outlook.InternalTreeTest do
|
|||||||
%TranslationUnit{status: :untranslated, uuid: @default_uuid,
|
%TranslationUnit{status: :untranslated, uuid: @default_uuid,
|
||||||
content: "<a href=\"dingsda.com\"><b>A</b> sentence</a> with many letters and many, many words. "}],
|
content: "<a href=\"dingsda.com\"><b>A</b> sentence</a> with many letters and many, many words. "}],
|
||||||
eph: %{sibling_with: :block}}]
|
eph: %{sibling_with: :block}}]
|
||||||
|
|
||||||
|
test "partition_text/1 doesn't split numbers and abbreviated names" do
|
||||||
|
tree = [
|
||||||
|
%InternalNode{
|
||||||
|
name: "p",
|
||||||
|
attributes: %{},
|
||||||
|
type: :element,
|
||||||
|
uuid: "0248aec7-c525-483d-a472-40a34488478d",
|
||||||
|
content: [
|
||||||
|
%InternalNode{
|
||||||
|
name: "",
|
||||||
|
attributes: %{},
|
||||||
|
type: :text,
|
||||||
|
uuid: "d35ac56f-bf10-47b1-af19-152e6225bb32",
|
||||||
|
content: "F. William Engdahl is 3.7 times more likely to write a good article than Mike Adams. But this doesn't mean anything bad about Mike.",
|
||||||
|
eph: %{sibling_with: :inline}
|
||||||
|
}
|
||||||
|
],
|
||||||
|
eph: %{sibling_with: :block}
|
||||||
|
}
|
||||||
|
]
|
||||||
|
assert InternalTree.partition_text(tree) |> unify_uuids_in_tunits() == [
|
||||||
|
%InternalNode{
|
||||||
|
name: "p",
|
||||||
|
attributes: %{},
|
||||||
|
type: :element,
|
||||||
|
uuid: "0248aec7-c525-483d-a472-40a34488478d",
|
||||||
|
content: [
|
||||||
|
%TranslationUnit{
|
||||||
|
status: :untranslated,
|
||||||
|
uuid: @default_uuid,
|
||||||
|
content: "F. William Engdahl is 3.7 times more likely to write a good article than Mike Adams. "
|
||||||
|
},
|
||||||
|
%TranslationUnit{
|
||||||
|
status: :untranslated,
|
||||||
|
uuid: @default_uuid,
|
||||||
|
content: "But this doesn't mean anything bad about Mike."
|
||||||
|
}
|
||||||
|
],
|
||||||
|
eph: %{sibling_with: :block}
|
||||||
|
}
|
||||||
|
]
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
end
|
end
|
||||||
|
|||||||
Reference in New Issue
Block a user