From 6ec259a0e71174651bae95d4628138bf6fd68742 Mon Sep 17 00:00:00 2001 From: kj_sh604 Date: Sun, 15 Mar 2026 16:19:35 -0400 Subject: refactor: packages/ --- packages/excalidraw/element/textWrapping.test.ts | 633 +++++++++++++++++++++++ 1 file changed, 633 insertions(+) create mode 100644 packages/excalidraw/element/textWrapping.test.ts (limited to 'packages/excalidraw/element/textWrapping.test.ts') diff --git a/packages/excalidraw/element/textWrapping.test.ts b/packages/excalidraw/element/textWrapping.test.ts new file mode 100644 index 0000000..6c7bcb8 --- /dev/null +++ b/packages/excalidraw/element/textWrapping.test.ts @@ -0,0 +1,633 @@ +import { wrapText, parseTokens } from "./textWrapping"; +import type { FontString } from "./types"; + +describe("Test wrapText", () => { + // font is irrelevant as jsdom does not support FontFace API + // `measureText` width is mocked to return `text.length` by `jest-canvas-mock` + // https://github.com/hustcc/jest-canvas-mock/blob/master/src/classes/TextMetrics.js + const font = "10px Cascadia, Segoe UI Emoji" as FontString; + + it("should wrap the text correctly when word length is exactly equal to max width", () => { + const text = "Hello Excalidraw"; + // Length of "Excalidraw" is 100 and exacty equal to max width + const res = wrapText(text, font, 100); + expect(res).toEqual(`Hello\nExcalidraw`); + }); + + it("should return the text as is if max width is invalid", () => { + const text = "Hello Excalidraw"; + expect(wrapText(text, font, NaN)).toEqual(text); + expect(wrapText(text, font, -1)).toEqual(text); + expect(wrapText(text, font, Infinity)).toEqual(text); + }); + + it("should show the text correctly when max width reached", () => { + const text = "HelloπŸ˜€"; + const maxWidth = 10; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("H\ne\nl\nl\no\nπŸ˜€"); + }); + + it("should not wrap number when wrapping line", () => { + const text = "don't wrap this number 99,100.99"; + const maxWidth = 300; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("don't wrap this number\n99,100.99"); + }); + + it("should trim all trailing whitespaces", () => { + const text = "Hello "; + const maxWidth = 50; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello"); + }); + + it("should trim all but one trailing whitespaces", () => { + const text = "Hello "; + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello "); + }); + + it("should keep preceding whitespaces and trim all trailing whitespaces", () => { + const text = " Hello World"; + const maxWidth = 90; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" Hello\nWorld"); + }); + + it("should keep some preceding whitespaces, trim trailing whitespaces, but kep those that fit in the trailing line", () => { + const text = " Hello World "; + const maxWidth = 90; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" Hello\nWorld "); + }); + + it("should trim keep those whitespace that fit in the trailing line", () => { + const text = "Hello Wo rl d "; + const maxWidth = 100; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello Wo\nrl d "); + }); + + it("should support multiple (multi-codepoint) emojis", () => { + const text = "πŸ˜€πŸ—ΊπŸ”₯πŸ‘©πŸ½β€πŸ¦°πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦πŸ‡¨πŸ‡Ώ"; + const maxWidth = 1; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("πŸ˜€\nπŸ—Ί\nπŸ”₯\nπŸ‘©πŸ½β€πŸ¦°\nπŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦\nπŸ‡¨πŸ‡Ώ"); + }); + + it("should wrap the text correctly when text contains hyphen", () => { + let text = + "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; + const res = wrapText(text, font, 110); + expect(res).toBe( + `Wikipedia\nis hosted\nby\nWikimedia-\nFoundation,\na non-\nprofit\norganizatio\nn that also\nhosts a\nrange-of\nother\nprojects`, + ); + + text = "Hello thereusing-now"; + expect(wrapText(text, font, 100)).toEqual("Hello\nthereusing\n-now"); + }); + + it("should support wrapping nested lists", () => { + const text = `\tA) one tab\t\t- two tabs - 8 spaces`; + + const maxWidth = 100; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(`\tA) one\ntab\t\t- two\ntabs\n- 8 spaces`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`\tA)\none\ntab\n- two\ntabs\n- 8\nspace\ns`); + }); + + describe("When text is CJK", () => { + it("should break each CJK character when width is very small", () => { + // "μ•ˆλ…•ν•˜μ„Έμš”" (Hangul) + "γ“γ‚“γ«γ‘γ―δΈ–η•Œ" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "δ½ ε₯½" (Han) = "Hello Hello World Hello Hi" + const text = "μ•ˆλ…•ν•˜μ„Έμš”γ“γ‚“γ«γ‘γ―δΈ–η•Œο½ΊοΎοΎ†οΎγƒδ½ ε₯½"; + const maxWidth = 10; + const res = wrapText(text, font, maxWidth); + expect(res).toBe( + "μ•ˆ\nλ…•\nν•˜\nμ„Έ\nμš”\nこ\nγ‚“\nに\nけ\nは\nδΈ–\nη•Œ\nο½Ί\nン\nοΎ†\nチ\nハ\nδ½ \nε₯½", + ); + }); + + it("should break CJK text into longer segments when width is larger", () => { + // "μ•ˆλ…•ν•˜μ„Έμš”" (Hangul) + "γ“γ‚“γ«γ‘γ―δΈ–η•Œ" (Hiragana, Kanji) + "コンニチハ" (Katakana) + "δ½ ε₯½" (Han) = "Hello Hello World Hello Hi" + const text = "μ•ˆλ…•ν•˜μ„Έμš”γ“γ‚“γ«γ‘γ―δΈ–η•Œο½ΊοΎοΎ†οΎγƒδ½ ε₯½"; + const maxWidth = 30; + const res = wrapText(text, font, maxWidth); + + // measureText is mocked, so it's not precisely what would happen in prod + expect(res).toBe("μ•ˆλ…•ν•˜\nμ„Έμš”γ“\nんにけ\nγ―δΈ–η•Œ\nコンニ\nチハ你\nε₯½"); + }); + + it("should handle a combination of CJK, latin, emojis and whitespaces", () => { + const text = `a醫 醫 bb δ½ ε₯½ world-i-πŸ˜€πŸ—ΊπŸ”₯`; + + const maxWidth = 150; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(`a醫 醫 bb δ½ \nε₯½ world-i-πŸ˜€πŸ—Ί\nπŸ”₯`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`a醫 醫\nbb δ½ \nε₯½\nworld\n-i-πŸ˜€\nπŸ—ΊπŸ”₯`); + + const maxWidth3 = 30; + const res3 = wrapText(text, font, maxWidth3); + expect(res3).toBe(`a醫\n醫\nbb\nδ½ ε₯½\nwor\nld-\ni-\nπŸ˜€\nπŸ—Ί\nπŸ”₯`); + }); + + it("should break before and after a regular CJK character", () => { + const text = "HelloたWorld"; + const maxWidth1 = 50; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe("Hello\nた\nWorld"); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe("Helloた\nWorld"); + }); + + it("should break before and after certain CJK symbols", () => { + const text = "γ“γ‚“γ«γ‘γ―γ€ƒδΈ–η•Œ"; + const maxWidth1 = 50; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe("こんにけは\nγ€ƒδΈ–η•Œ"); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe("こんにけは〃\nδΈ–η•Œ"); + }); + + it("should break after, not before for certain CJK pairs", () => { + const text = "Hello γŸγ€‚"; + const maxWidth = 70; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello\nγŸγ€‚"); + }); + + it("should break before, not after for certain CJK pairs", () => { + const text = "Helloγ€ŒγŸWorld」"; + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hello\nγ€ŒγŸ\nWorld」"); + }); + + it("should break after, not before for certain CJK character pairs", () => { + const text = "γ€ŒHelloγŸγ€World"; + const maxWidth = 70; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("γ€ŒHello\nγŸγ€World"); + }); + + it("should break Chinese sentences", () => { + const text = `δΈ­ε›½δ½ ε₯½οΌθΏ™ζ˜―δΈ€δΈͺ桋试。 +ζˆ‘δ»¬ζ₯ηœ‹ηœ‹οΌšδΊΊζ°‘εΈΒ₯1234γ€ŒεΎˆθ΄΅γ€ +οΌˆζ‹¬ε·οΌ‰γ€ι€—ε·οΌŒε₯号。空格 ζ’θ‘Œγ€€ε…¨θ§’η¬¦ε·β€¦β€”`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`δΈ­ε›½δ½ ε₯½οΌθΏ™ζ˜―δΈ€\nδΈͺ桋试。 +ζˆ‘δ»¬ζ₯ηœ‹ηœ‹οΌšδΊΊζ°‘\n币Β₯1234γ€ŒεΎˆ\n贡」 +οΌˆζ‹¬ε·οΌ‰γ€ι€—ε·οΌŒ\nε₯号。空格 捒葌\n全角符号…—`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`δΈ­ε›½δ½ ε₯½οΌ\nθΏ™ζ˜―δΈ€δΈͺζ΅‹\n试。 +ζˆ‘δ»¬ζ₯ηœ‹\nηœ‹οΌšδΊΊζ°‘εΈ\nΒ₯1234\nγ€ŒεΎˆθ΄΅γ€ +οΌˆζ‹¬ε·οΌ‰γ€\nι€—ε·οΌŒε₯\n号。空格\nζ’θ‘Œγ€€ε…¨θ§’\n符号…—`); + }); + + it("should break Japanese sentences", () => { + const text = `ζ—₯ζœ¬γ“γ‚“γ«γ‘γ―οΌγ“γ‚Œγ―γƒ†γ‚Ήγƒˆγ§γ™γ€‚ + θ¦‹γ¦γΏγΎγ—γ‚‡γ†οΌšε††οΏ₯1234γ€Œι«˜γ„γ€ + οΌˆζ‹¬εΌ§οΌ‰γ€θͺ­η‚Ήγ€ε₯点。 + η©Ίη™½ ζ”Ήθ‘Œγ€€ε…¨θ§’θ¨˜ε·β€¦γƒΌ`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`ζ—₯ζœ¬γ“γ‚“γ«γ‘γ―οΌ\nγ“γ‚Œγ―γƒ†γ‚Ήγƒˆγ§\nす。 + 見てみましょ\nγ†οΌšε††οΏ₯1234\nγ€Œι«˜γ„γ€ + οΌˆζ‹¬εΌ§οΌ‰γ€θͺ­\n点、ε₯点。 + η©Ίη™½ ζ”Ήθ‘Œ\nε…¨θ§’θ¨˜ε·β€¦γƒΌ`); + + const maxWidth2 = 50; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`ζ—₯ζœ¬γ“γ‚“γ«\nγ‘γ―οΌγ“γ‚Œ\nγ―γƒ†γ‚Ήγƒˆγ§\nす。 + 見てみ\nγΎγ—γ‚‡γ†οΌš\n円\nοΏ₯1234\nγ€Œι«˜γ„γ€ + οΌˆζ‹¬\n弧)、θͺ­\n点、ε₯点。 + η©Ίη™½\nζ”Ήθ‘Œγ€€ε…¨θ§’\nθ¨˜ε·β€¦γƒΌ`); + }); + + it("should break Korean sentences", () => { + const text = `ν•œκ΅­ μ•ˆλ…•ν•˜μ„Έμš”! 이것은 ν…ŒμŠ€νŠΈμž…λ‹ˆλ‹€. +우리 보자: 원화₩1234γ€ŒλΉ„μ‹Έλ‹€γ€ +(κ΄„ν˜Έ), μ‰Όν‘œ, λ§ˆμΉ¨ν‘œ. +곡백 μ€„λ°”κΏˆγ€€μ „κ°κΈ°ν˜Έβ€¦β€”`; + + const maxWidth1 = 80; + const res1 = wrapText(text, font, maxWidth1); + expect(res1).toBe(`ν•œκ΅­ μ•ˆλ…•ν•˜μ„Έ\nμš”! 이것은 ν…Œ\nμŠ€νŠΈμž…λ‹ˆλ‹€. +우리 보자: 원\nν™”β‚©1234γ€ŒλΉ„\n싸닀」 +(κ΄„ν˜Έ), μ‰Ό\nν‘œ, λ§ˆμΉ¨ν‘œ. +곡백 μ€„λ°”κΏˆγ€€μ „\nκ°κΈ°ν˜Έβ€¦β€”`); + + const maxWidth2 = 60; + const res2 = wrapText(text, font, maxWidth2); + expect(res2).toBe(`ν•œκ΅­ μ•ˆλ…•ν•˜\nμ„Έμš”! 이것\n은 ν…ŒμŠ€νŠΈμž…\nλ‹ˆλ‹€. +우리 보자:\n원화\nβ‚©1234\nγ€ŒλΉ„μ‹Έλ‹€γ€ +(κ΄„ν˜Έ),\nμ‰Όν‘œ, 마침\nν‘œ. +곡백 μ€„λ°”κΏˆ\nμ „κ°κΈ°ν˜Έβ€¦β€”`); + }); + }); + + describe("When text contains leading whitespaces", () => { + const text = " \t Hello world"; + + it("should preserve leading whitespaces", () => { + const maxWidth = 120; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(" \t Hello\nworld"); + }); + + it("should break and collapse leading whitespaces when line breaks", () => { + const maxWidth = 60; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("\nHello\nworld"); + }); + + it("should break and collapse leading whitespaces whe words break", () => { + const maxWidth = 30; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("\nHel\nlo\nwor\nld"); + }); + }); + + describe("When text contains trailing whitespaces", () => { + it("shouldn't add new lines for trailing spaces", () => { + const text = "Hello whats up "; + const maxWidth = 190; + const res = wrapText(text, font, maxWidth); + expect(res).toBe(text); + }); + + it("should ignore trailing whitespaces when line breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 400; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrosesquippedaliophobia\n??????"); + }); + + it("should not ignore trailing whitespaces when word breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 300; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrosesquippedalio\nphobia ??????"); + }); + + it("should ignore trailing whitespaces when word breaks and line breaks", () => { + const text = "Hippopotomonstrosesquippedaliophobia ??????"; + const maxWidth = 180; + const res = wrapText(text, font, maxWidth); + expect(res).toBe("Hippopotomonstrose\nsquippedaliophobia\n??????"); + }); + }); + + describe("When text doesn't contain new lines", () => { + const text = "Hello whats up"; + + [ + { + desc: "break all words when width of each word is less than container width", + width: 70, + res: `Hello\nwhats\nup`, + }, + { + desc: "break all characters when width of each character is less than container width", + width: 15, + res: `H\ne\nl\nl\no\nw\nh\na\nt\ns\nu\np`, + }, + { + desc: "break words as per the width", + + width: 130, + res: `Hello whats\nup`, + }, + { + desc: "fit the container", + + width: 240, + res: "Hello whats up", + }, + { + desc: "push the word if its equal to max width", + width: 50, + res: `Hello\nwhats\nup`, + }, + ].forEach((data) => { + it(`should ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("When text contain new lines", () => { + const text = `Hello\n whats up`; + [ + { + desc: "break all words when width of each word is less than container width", + width: 70, + res: `Hello\n whats\nup`, + }, + { + desc: "break all characters when width of each character is less than container width", + width: 15, + res: `H\ne\nl\nl\no\n\nw\nh\na\nt\ns\nu\np`, + }, + { + desc: "break words as per the width", + width: 140, + res: `Hello\n whats up`, + }, + ].forEach((data) => { + it(`should respect new lines and ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("When text is long", () => { + const text = `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg break it now`; + [ + { + desc: "fit characters of long string as per container width", + width: 160, + res: `hellolongtextthi\nsiswhatsupwithyo\nuIamtypingggggan\ndtypinggg break\nit now`, + }, + { + desc: "fit characters of long string as per container width and break words as per the width", + + width: 120, + res: `hellolongtex\ntthisiswhats\nupwithyouIam\ntypingggggan\ndtypinggg\nbreak it now`, + }, + { + desc: "fit the long text when container width is greater than text length and move the rest to next line", + + width: 590, + res: `hellolongtextthisiswhatsupwithyouIamtypingggggandtypinggg\nbreak it now`, + }, + ].forEach((data) => { + it(`should ${data.desc}`, () => { + const res = wrapText(text, font, data.width); + expect(res).toEqual(data.res); + }); + }); + }); + + describe("Test parseTokens", () => { + it("should tokenize latin", () => { + let text = "Excalidraw is a virtual collaborative whiteboard"; + + expect(parseTokens(text)).toEqual([ + "Excalidraw", + " ", + "is", + " ", + "a", + " ", + "virtual", + " ", + "collaborative", + " ", + "whiteboard", + ]); + + text = + "Wikipedia is hosted by Wikimedia- Foundation, a non-profit organization that also hosts a range-of other projects"; + expect(parseTokens(text)).toEqual([ + "Wikipedia", + " ", + "is", + " ", + "hosted", + " ", + "by", + " ", + "Wikimedia-", + " ", + "Foundation,", + " ", + "a", + " ", + "non-", + "profit", + " ", + "organization", + " ", + "that", + " ", + "also", + " ", + "hosts", + " ", + "a", + " ", + "range-", + "of", + " ", + "other", + " ", + "projects", + ]); + }); + + it("should not tokenize number", () => { + const text = "99,100.99"; + const tokens = parseTokens(text); + expect(tokens).toEqual(["99,100.99"]); + }); + + it("should tokenize joined emojis", () => { + const text = `πŸ˜¬πŸŒπŸ—ΊπŸ”₯β˜‚οΈπŸ‘©πŸ½β€πŸ¦°πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦πŸ‘©πŸΎβ€πŸ”¬πŸ³οΈβ€πŸŒˆπŸ§”β€β™€οΈπŸ§‘β€πŸ€β€πŸ§‘πŸ™…πŸ½β€β™‚οΈβœ…0οΈβƒ£πŸ‡¨πŸ‡ΏπŸ¦…`; + const tokens = parseTokens(text); + + expect(tokens).toEqual([ + "😬", + "🌍", + "πŸ—Ί", + "πŸ”₯", + "β˜‚οΈ", + "πŸ‘©πŸ½β€πŸ¦°", + "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", + "πŸ‘©πŸΎβ€πŸ”¬", + "πŸ³οΈβ€πŸŒˆ", + "πŸ§”β€β™€οΈ", + "πŸ§‘β€πŸ€β€πŸ§‘", + "πŸ™…πŸ½β€β™‚οΈ", + "βœ…", + "0️⃣", + "πŸ‡¨πŸ‡Ώ", + "πŸ¦…", + ]); + }); + + it("should tokenize emojis mixed with mixed text", () => { + const text = `😬a🌍bπŸ—ΊcπŸ”₯dβ˜‚οΈγ€ŠπŸ‘©πŸ½β€πŸ¦°γ€‹πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦εΎ·πŸ‘©πŸΎβ€πŸ”¬γ“πŸ³οΈβ€πŸŒˆμ•ˆπŸ§”β€β™€οΈgπŸ§‘β€πŸ€β€πŸ§‘hπŸ™…πŸ½β€β™‚οΈeβœ…f0️⃣gπŸ‡¨πŸ‡Ώ10πŸ¦…#hash`; + const tokens = parseTokens(text); + + expect(tokens).toEqual([ + "😬", + "a", + "🌍", + "b", + "πŸ—Ί", + "c", + "πŸ”₯", + "d", + "β˜‚οΈ", + "γ€Š", + "πŸ‘©πŸ½β€πŸ¦°", + "》", + "πŸ‘¨β€πŸ‘©β€πŸ‘§β€πŸ‘¦", + "εΎ·", + "πŸ‘©πŸΎβ€πŸ”¬", + "こ", + "πŸ³οΈβ€πŸŒˆ", + "μ•ˆ", + "πŸ§”β€β™€οΈ", + "g", + "πŸ§‘β€πŸ€β€πŸ§‘", + "h", + "πŸ™…πŸ½β€β™‚οΈ", + "e", + "βœ…", + "f0️⃣g", // bummer, but ok, as we traded kecaps not breaking (less common) for hash and numbers not breaking (more common) + "πŸ‡¨πŸ‡Ώ", + "10", // nice! do not break the number, as it's by default matched by \p{Emoji} + "πŸ¦…", + "#hash", // nice! do not break the hash, as it's by default matched by \p{Emoji} + ]); + }); + + it("should tokenize decomposed chars into their composed variants", () => { + // each input character is in a decomposed form + const text = "cΜŒγ¦γ‚™aΜˆγ²γ‚šΞ΅Μα„ƒα…‘ΠΈΜ†α„’α…‘α†«"; + expect(text.normalize("NFC").length).toEqual(8); + expect(text).toEqual(text.normalize("NFD")); + + const tokens = parseTokens(text); + expect(tokens.length).toEqual(8); + expect(tokens).toEqual(["č", "で", "Γ€", "ぴ", "Ξ­", "λ‹€", "ΠΉ", "ν•œ"]); + }); + + it("should tokenize artificial CJK", () => { + const text = `γ€Šι“εΎ·ηΆ“γ€‹ι†«-ι†«γ“γ‚“γ«γ‘γ―δΈ–η•ŒοΌμ•ˆλ…•ν•˜μ„Έμš”μ„Έκ³„οΌ›μš”γ€,λ‹€.λ‹€...원/달(((λ‹€)))[[1]]γ€š({((ν•œ))>)γ€›(γ€ŒγŸγ€)γŸβ€¦[Hello] \tγ€€WorldοΌŸγƒ‹γƒ₯ーヨーク・οΏ₯3700.55す。090-1234-5678οΏ₯1,000γ€œοΌ„5,000γ€Œη΄ ζ™΄γ‚‰γ—γ„οΌγ€γ€”ι‡θ¦γ€•οΌƒοΌ‘οΌšTaro君30οΌ…γ―γ€οΌˆγŸγͺγ°γŸοΌ‰γ€°οΏ₯110Β±οΏ₯570で20β„ƒγ€œ9:30γ€œ10:00【一η•ͺ】`; + // [ + // 'γ€Šι“', 'εΎ·', '碓》', '醫-', + // '醫', 'こ', 'γ‚“', 'に', + // 'け', 'は', 'δΈ–', 'η•ŒοΌ', + // 'μ•ˆ', 'λ…•', 'ν•˜', 'μ„Έ', + // 'μš”', 'μ„Έ', '계;', 'μš”γ€,', + // 'λ‹€.', 'λ‹€...', '원/', '달', + // '(((λ‹€)))', '[[1]]', 'γ€š({((ν•œ))>)γ€›', '(γ€ŒγŸγ€)', + // 'γŸβ€¦', '[Hello]', ' ', '\t', + // 'γ€€', 'World?', 'ニ', 'γƒ₯', + // 'γƒΌ', 'ヨ', 'γƒΌ', 'ク・', + // 'οΏ₯3700.55', 'す。', '090-', '1234-', + // '5678', 'οΏ₯1,000γ€œ', 'οΌ„5,000', 'γ€Œη΄ ', + // 'ζ™΄', 'ら', 'し', 'い!」', + // '〔重', '要〕', 'οΌƒ', 'οΌ‘οΌš', + // 'Taro', '君', '30οΌ…', 'は、', + // '(た', 'γͺ', 'ば', 'γŸοΌ‰', + // 'γ€°', 'οΏ₯110Β±', 'οΏ₯570', 'で', + // '20β„ƒγ€œ', '9:30γ€œ', '10:00', '【一', + // 'η•ͺ】' + // ] + const tokens = parseTokens(text); + + // Latin + expect(tokens).toContain("[[1]]"); + expect(tokens).toContain("[Hello]"); + expect(tokens).toContain("World?"); + expect(tokens).toContain("Taro"); + + // Chinese + expect(tokens).toContain("γ€Šι“"); + expect(tokens).toContain("εΎ·"); + expect(tokens).toContain("碓》"); + expect(tokens).toContain("醫-"); + expect(tokens).toContain("醫"); + + // Japanese + expect(tokens).toContain("こ"); + expect(tokens).toContain("γ‚“"); + expect(tokens).toContain("に"); + expect(tokens).toContain("け"); + expect(tokens).toContain("は"); + expect(tokens).toContain("δΈ–"); + expect(tokens).toContain("ク・"); + expect(tokens).toContain("η•ŒοΌ"); + expect(tokens).toContain("γŸβ€¦"); + expect(tokens).toContain("す。"); + expect(tokens).toContain("γƒ₯"); + expect(tokens).toContain("γ€Œη΄ "); + expect(tokens).toContain("ζ™΄"); + expect(tokens).toContain("ら"); + expect(tokens).toContain("し"); + expect(tokens).toContain("い!」"); + expect(tokens).toContain("君"); + expect(tokens).toContain("は、"); + expect(tokens).toContain("(た"); + expect(tokens).toContain("γͺ"); + expect(tokens).toContain("ば"); + expect(tokens).toContain("γŸοΌ‰"); + expect(tokens).toContain("で"); + expect(tokens).toContain("【一"); + expect(tokens).toContain("η•ͺ】"); + + // Check for Korean + expect(tokens).toContain("μ•ˆ"); + expect(tokens).toContain("λ…•"); + expect(tokens).toContain("ν•˜"); + expect(tokens).toContain("μ„Έ"); + expect(tokens).toContain("μš”"); + expect(tokens).toContain("μ„Έ"); + expect(tokens).toContain("계;"); + expect(tokens).toContain("μš”γ€,"); + expect(tokens).toContain("λ‹€."); + expect(tokens).toContain("λ‹€..."); + expect(tokens).toContain("원/"); + expect(tokens).toContain("달"); + expect(tokens).toContain("(((λ‹€)))"); + expect(tokens).toContain("γ€š({((ν•œ))>)γ€›"); + expect(tokens).toContain("(γ€ŒγŸγ€)"); + + // Numbers and units + expect(tokens).toContain("οΏ₯3700.55"); + expect(tokens).toContain("090-"); + expect(tokens).toContain("1234-"); + expect(tokens).toContain("5678"); + expect(tokens).toContain("οΏ₯1,000γ€œ"); + expect(tokens).toContain("οΌ„5,000"); + expect(tokens).toContain("οΌ‘οΌš"); + expect(tokens).toContain("30οΌ…"); + expect(tokens).toContain("οΏ₯110Β±"); + expect(tokens).toContain("20β„ƒγ€œ"); + expect(tokens).toContain("9:30γ€œ"); + expect(tokens).toContain("10:00"); + + // Punctuation and symbols + expect(tokens).toContain(" "); + expect(tokens).toContain("\t"); + expect(tokens).toContain("γ€€"); + expect(tokens).toContain("ニ"); + expect(tokens).toContain("γƒΌ"); + expect(tokens).toContain("ヨ"); + expect(tokens).toContain("γ€°"); + expect(tokens).toContain("οΌƒ"); + }); + }); +}); -- cgit v1.2.3