From bac259debf419694f745d5ff41584051d9b20ea3 Mon Sep 17 00:00:00 2001 From: Thomas Steiner Date: Fri, 11 Jul 2025 17:26:38 +0200 Subject: [PATCH] Restructure to use laude or LanguageDetector --- .../compose/components/compose_form.jsx | 46 +++-------------- .../compose/components/language_dropdown.tsx | 21 ++++++-- .../compose/util/language_detection.d.ts | 5 ++ .../compose/util/language_detection.js | 40 +++++++-------- ...anguage_detection_with_languagedetector.js | 50 +++++++++++++++++++ .../util/language_detection_with_laude.js | 38 ++++++++++++++ 6 files changed, 138 insertions(+), 62 deletions(-) create mode 100644 app/javascript/mastodon/features/compose/util/language_detection.d.ts create mode 100644 app/javascript/mastodon/features/compose/util/language_detection_with_languagedetector.js create mode 100644 app/javascript/mastodon/features/compose/util/language_detection_with_laude.js diff --git a/app/javascript/mastodon/features/compose/components/compose_form.jsx b/app/javascript/mastodon/features/compose/components/compose_form.jsx index 61c75211568..fcce6aec423 100644 --- a/app/javascript/mastodon/features/compose/components/compose_form.jsx +++ b/app/javascript/mastodon/features/compose/components/compose_form.jsx @@ -10,8 +10,6 @@ import ImmutablePureComponent from 'react-immutable-pure-component'; import { length } from 'stringz'; -import debounce from 'lodash.debounce'; - import { missingAltTextModal } from 'mastodon/initial_state'; import { changeComposeLanguage } from 'mastodon/actions/compose'; @@ -24,6 +22,7 @@ import PrivacyDropdownContainer from '../containers/privacy_dropdown_container'; import SpoilerButtonContainer from '../containers/spoiler_button_container'; import UploadButtonContainer from '../containers/upload_button_container'; import { countableText } from '../util/counter'; +import { debouncedGuess, countLetters } from '../util/language_detection'; import { CharacterCounter } from './character_counter'; import { EditIndicator } from './edit_indicator'; @@ -50,24 +49,6 @@ const mapStateToProps = (state) => ({ currentLanguage: state.meta.get('locale'), }); -const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis; -let supportsLanguageDetector = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available'; -let languageDetector; -// If the API is supported, but the model not loaded yet… -if (languageDetectorInGlobalThis && !supportsLanguageDetector) { - // …trigger the model download - LanguageDetector.create().then((_languageDetector) => { - supportsLanguageDetector = true - languageDetector = _languageDetector - }) -} - -function countLetters(text) { - const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' }) - const letters = [...segmenter.segment(text)] - return letters.length -} - class ComposeForm extends ImmutablePureComponent { static propTypes = { intl: PropTypes.object.isRequired, @@ -112,7 +93,6 @@ class ComposeForm extends ImmutablePureComponent { constructor(props) { super(props); this.textareaRef = createRef(null); - this.debouncedHandleKeyUp = debounce(this._handleKeyUp.bind(this), 500); } handleChange = (e) => { @@ -125,30 +105,21 @@ class ComposeForm extends ImmutablePureComponent { } }; - handleKeyUp = (e) => { - this.debouncedHandleKeyUp(e); - } - - _handleKeyUp = async (e) => { - if (!supportsLanguageDetector) { - return; - } - if (!languageDetector) { - languageDetector = await globalThis.LanguageDetector.create(); - } + handleKeyUp = async (e) => { const text = this.getFulltextForCharacterCounting().trim(); const currentLanguage = this.props.currentLanguage; if (!text || countLetters(text) <= 5) { this.props.dispatch(changeComposeLanguage(currentLanguage)); return; } - try { - let detectedLanguage = (await languageDetector.detect(text))[0].detectedLanguage - detectedLanguage = detectedLanguage === 'und' ? currentLanguage : detectedLanguage.substring(0, 2); + let detectedLanguage = await debouncedGuess(text); + if (!detectedLanguage) { + this.props.dispatch(changeComposeLanguage(currentLanguage)); + return; + } this.props.dispatch(changeComposeLanguage(detectedLanguage)); - } - catch { + } catch { this.props.dispatch(changeComposeLanguage(currentLanguage)); } } @@ -218,7 +189,6 @@ class ComposeForm extends ImmutablePureComponent { componentWillUnmount () { if (this.timeout) clearTimeout(this.timeout); - this.debouncedHandleKeyUp.cancel(); } componentDidUpdate (prevProps) { diff --git a/app/javascript/mastodon/features/compose/components/language_dropdown.tsx b/app/javascript/mastodon/features/compose/components/language_dropdown.tsx index d11891308f5..2fe73f34250 100644 --- a/app/javascript/mastodon/features/compose/components/language_dropdown.tsx +++ b/app/javascript/mastodon/features/compose/components/language_dropdown.tsx @@ -20,7 +20,7 @@ import { languages as preloadedLanguages } from 'mastodon/initial_state'; import type { RootState } from 'mastodon/store'; import { useAppSelector, useAppDispatch } from 'mastodon/store'; -import { debouncedGuess } from '../util/language_detection'; +import { debouncedGuess, countLetters } from '../util/language_detection'; const messages = defineMessages({ changeLanguage: { @@ -375,12 +375,25 @@ export const LanguageDropdown: React.FC = () => { ); useEffect(() => { - if (text.length > 20) { - debouncedGuess(text, setGuess); + let canceled = false; + + if (countLetters(text) >= 5) { + debouncedGuess(text) + .then((lang) => { + if (!canceled) { + setGuess(lang ?? ''); + } + }) + .catch(() => { + setGuess(''); + }); } else { - debouncedGuess.cancel(); setGuess(''); } + + return () => { + canceled = true; + }; }, [text, setGuess]); return ( diff --git a/app/javascript/mastodon/features/compose/util/language_detection.d.ts b/app/javascript/mastodon/features/compose/util/language_detection.d.ts new file mode 100644 index 00000000000..289c0476b85 --- /dev/null +++ b/app/javascript/mastodon/features/compose/util/language_detection.d.ts @@ -0,0 +1,5 @@ +export declare const debouncedGuess: ( + text: string, +) => Promise; + +export declare const countLetters: (text: string) => number; diff --git a/app/javascript/mastodon/features/compose/util/language_detection.js b/app/javascript/mastodon/features/compose/util/language_detection.js index ed22a2bd9ca..531cb6f632b 100644 --- a/app/javascript/mastodon/features/compose/util/language_detection.js +++ b/app/javascript/mastodon/features/compose/util/language_detection.js @@ -1,7 +1,5 @@ -import lande from 'lande'; -import { debounce } from 'lodash'; - -import { urlRegex } from './url_regex'; +const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis; +let languageDetectorSupportedAndReady = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available'; const ISO_639_MAP = { afr: 'af', // Afrikaans @@ -56,21 +54,23 @@ const ISO_639_MAP = { vie: 'vi', // Vietnamese }; -const guessLanguage = (text) => { - text = text - .replace(urlRegex, '') - .replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, ''); - - if (text.length > 20) { - const [lang, confidence] = lande(text)[0]; - - if (confidence > 0.8) - return ISO_639_MAP[lang]; - } - - return ''; +const countLetters = (text) => { + const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' }) + const letters = [...segmenter.segment(text)] + return letters.length }; -export const debouncedGuess = debounce((text, setGuess) => { - setGuess(guessLanguage(text)); -}, 500, { maxWait: 1500, leading: true, trailing: true }); +let module; +// If the API is supported, but the model not loaded yet… +if (languageDetectorInGlobalThis) { + if (!languageDetectorSupportedAndReady) { + // …trigger the model download + self.LanguageDetector.create(); + } + module = await import('./language_detection_with_languagedetector'); +} else { + module = await import('./language_detection_with_laude'); +} +const debouncedGuess = module.debouncedGuess; + +export { debouncedGuess, countLetters, ISO_639_MAP }; diff --git a/app/javascript/mastodon/features/compose/util/language_detection_with_languagedetector.js b/app/javascript/mastodon/features/compose/util/language_detection_with_languagedetector.js new file mode 100644 index 00000000000..3d98f236eb4 --- /dev/null +++ b/app/javascript/mastodon/features/compose/util/language_detection_with_languagedetector.js @@ -0,0 +1,50 @@ +import { debounce } from 'lodash'; + +import { countLetters } from './language_detection'; +import { urlRegex } from './url_regex'; + +const guessLanguage = async (text) => { + text = text + .replace(urlRegex, '') + .replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, ''); + + if (countLetters(text) > 5) { + try { + const languageDetector = await self.LanguageDetector.create(); + let {detectedLanguage, confidence} = (await languageDetector.detect(text))[0]; + if (confidence > 0.8) { + detectedLanguage = detectedLanguage.split('-')[0]; + return detectedLanguage; + } + } catch { + return ''; + } + } + + return ''; +}; + +const debouncedGuess = (() => { + let resolver = null; + let rejecter = null; + + const debounced = debounce(async (text) => { + try { + const result = await guessLanguage(text); + if (resolver) { + resolver(result); + resolver = null; + } + } catch { + rejecter(''); + } + }, 500, { maxWait: 1500, leading: true, trailing: true }); + + return (text) => new Promise((resolve, reject) => { + resolver = resolve; + rejecter = reject; + debounced(text); + }); +})(); + +export { debouncedGuess }; diff --git a/app/javascript/mastodon/features/compose/util/language_detection_with_laude.js b/app/javascript/mastodon/features/compose/util/language_detection_with_laude.js new file mode 100644 index 00000000000..aa6e768a2c5 --- /dev/null +++ b/app/javascript/mastodon/features/compose/util/language_detection_with_laude.js @@ -0,0 +1,38 @@ +import lande from 'lande'; +import { debounce } from 'lodash'; + +import { countLetters, ISO_639_MAP } from './language_detection'; +import { urlRegex } from './url_regex'; + +const guessLanguage = (text) => { + text = text + .replace(urlRegex, '') + .replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, ''); + + if (countLetters(text) > 20) { + const [lang, confidence] = lande(text)[0]; + if (confidence > 0.8) + return ISO_639_MAP[lang]; + } + + return ''; +}; + +const debouncedGuess = (() => { + let resolver = null; + + const debounced = debounce((text) => { + const result = guessLanguage(text); + if (resolver) { + resolver(result); + resolver = null; + } + }, 500, { maxWait: 1500, leading: true, trailing: true }); + + return (text) => new Promise((resolve) => { + resolver = resolve; + debounced(text); + }); +})(); + +export { debouncedGuess };