Restructure to use laude or LanguageDetector

This commit is contained in:
Thomas Steiner 2025-07-11 17:26:38 +02:00
parent 3e1e6762aa
commit bac259debf
6 changed files with 138 additions and 62 deletions

View File

@ -10,8 +10,6 @@ import ImmutablePureComponent from 'react-immutable-pure-component';
import { length } from 'stringz';
import debounce from 'lodash.debounce';
import { missingAltTextModal } from 'mastodon/initial_state';
import { changeComposeLanguage } from 'mastodon/actions/compose';
@ -24,6 +22,7 @@ import PrivacyDropdownContainer from '../containers/privacy_dropdown_container';
import SpoilerButtonContainer from '../containers/spoiler_button_container';
import UploadButtonContainer from '../containers/upload_button_container';
import { countableText } from '../util/counter';
import { debouncedGuess, countLetters } from '../util/language_detection';
import { CharacterCounter } from './character_counter';
import { EditIndicator } from './edit_indicator';
@ -50,24 +49,6 @@ const mapStateToProps = (state) => ({
currentLanguage: state.meta.get('locale'),
});
const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis;
let supportsLanguageDetector = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available';
let languageDetector;
// If the API is supported, but the model not loaded yet
if (languageDetectorInGlobalThis && !supportsLanguageDetector) {
// trigger the model download
LanguageDetector.create().then((_languageDetector) => {
supportsLanguageDetector = true
languageDetector = _languageDetector
})
}
function countLetters(text) {
const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' })
const letters = [...segmenter.segment(text)]
return letters.length
}
class ComposeForm extends ImmutablePureComponent {
static propTypes = {
intl: PropTypes.object.isRequired,
@ -112,7 +93,6 @@ class ComposeForm extends ImmutablePureComponent {
constructor(props) {
super(props);
this.textareaRef = createRef(null);
this.debouncedHandleKeyUp = debounce(this._handleKeyUp.bind(this), 500);
}
handleChange = (e) => {
@ -125,30 +105,21 @@ class ComposeForm extends ImmutablePureComponent {
}
};
handleKeyUp = (e) => {
this.debouncedHandleKeyUp(e);
}
_handleKeyUp = async (e) => {
if (!supportsLanguageDetector) {
return;
}
if (!languageDetector) {
languageDetector = await globalThis.LanguageDetector.create();
}
handleKeyUp = async (e) => {
const text = this.getFulltextForCharacterCounting().trim();
const currentLanguage = this.props.currentLanguage;
if (!text || countLetters(text) <= 5) {
this.props.dispatch(changeComposeLanguage(currentLanguage));
return;
}
try {
let detectedLanguage = (await languageDetector.detect(text))[0].detectedLanguage
detectedLanguage = detectedLanguage === 'und' ? currentLanguage : detectedLanguage.substring(0, 2);
let detectedLanguage = await debouncedGuess(text);
if (!detectedLanguage) {
this.props.dispatch(changeComposeLanguage(currentLanguage));
return;
}
this.props.dispatch(changeComposeLanguage(detectedLanguage));
}
catch {
} catch {
this.props.dispatch(changeComposeLanguage(currentLanguage));
}
}
@ -218,7 +189,6 @@ class ComposeForm extends ImmutablePureComponent {
componentWillUnmount () {
if (this.timeout) clearTimeout(this.timeout);
this.debouncedHandleKeyUp.cancel();
}
componentDidUpdate (prevProps) {

View File

@ -20,7 +20,7 @@ import { languages as preloadedLanguages } from 'mastodon/initial_state';
import type { RootState } from 'mastodon/store';
import { useAppSelector, useAppDispatch } from 'mastodon/store';
import { debouncedGuess } from '../util/language_detection';
import { debouncedGuess, countLetters } from '../util/language_detection';
const messages = defineMessages({
changeLanguage: {
@ -375,12 +375,25 @@ export const LanguageDropdown: React.FC = () => {
);
useEffect(() => {
if (text.length > 20) {
debouncedGuess(text, setGuess);
let canceled = false;
if (countLetters(text) >= 5) {
debouncedGuess(text)
.then((lang) => {
if (!canceled) {
setGuess(lang ?? '');
}
})
.catch(() => {
setGuess('');
});
} else {
debouncedGuess.cancel();
setGuess('');
}
return () => {
canceled = true;
};
}, [text, setGuess]);
return (

View File

@ -0,0 +1,5 @@
export declare const debouncedGuess: (
text: string,
) => Promise<string | undefined>;
export declare const countLetters: (text: string) => number;

View File

@ -1,7 +1,5 @@
import lande from 'lande';
import { debounce } from 'lodash';
import { urlRegex } from './url_regex';
const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis;
let languageDetectorSupportedAndReady = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available';
const ISO_639_MAP = {
afr: 'af', // Afrikaans
@ -56,21 +54,23 @@ const ISO_639_MAP = {
vie: 'vi', // Vietnamese
};
const guessLanguage = (text) => {
text = text
.replace(urlRegex, '')
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
if (text.length > 20) {
const [lang, confidence] = lande(text)[0];
if (confidence > 0.8)
return ISO_639_MAP[lang];
}
return '';
const countLetters = (text) => {
const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' })
const letters = [...segmenter.segment(text)]
return letters.length
};
export const debouncedGuess = debounce((text, setGuess) => {
setGuess(guessLanguage(text));
}, 500, { maxWait: 1500, leading: true, trailing: true });
let module;
// If the API is supported, but the model not loaded yet…
if (languageDetectorInGlobalThis) {
if (!languageDetectorSupportedAndReady) {
// …trigger the model download
self.LanguageDetector.create();
}
module = await import('./language_detection_with_languagedetector');
} else {
module = await import('./language_detection_with_laude');
}
const debouncedGuess = module.debouncedGuess;
export { debouncedGuess, countLetters, ISO_639_MAP };

View File

@ -0,0 +1,50 @@
import { debounce } from 'lodash';
import { countLetters } from './language_detection';
import { urlRegex } from './url_regex';
const guessLanguage = async (text) => {
text = text
.replace(urlRegex, '')
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
if (countLetters(text) > 5) {
try {
const languageDetector = await self.LanguageDetector.create();
let {detectedLanguage, confidence} = (await languageDetector.detect(text))[0];
if (confidence > 0.8) {
detectedLanguage = detectedLanguage.split('-')[0];
return detectedLanguage;
}
} catch {
return '';
}
}
return '';
};
const debouncedGuess = (() => {
let resolver = null;
let rejecter = null;
const debounced = debounce(async (text) => {
try {
const result = await guessLanguage(text);
if (resolver) {
resolver(result);
resolver = null;
}
} catch {
rejecter('');
}
}, 500, { maxWait: 1500, leading: true, trailing: true });
return (text) => new Promise((resolve, reject) => {
resolver = resolve;
rejecter = reject;
debounced(text);
});
})();
export { debouncedGuess };

View File

@ -0,0 +1,38 @@
import lande from 'lande';
import { debounce } from 'lodash';
import { countLetters, ISO_639_MAP } from './language_detection';
import { urlRegex } from './url_regex';
const guessLanguage = (text) => {
text = text
.replace(urlRegex, '')
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
if (countLetters(text) > 20) {
const [lang, confidence] = lande(text)[0];
if (confidence > 0.8)
return ISO_639_MAP[lang];
}
return '';
};
const debouncedGuess = (() => {
let resolver = null;
const debounced = debounce((text) => {
const result = guessLanguage(text);
if (resolver) {
resolver(result);
resolver = null;
}
}, 500, { maxWait: 1500, leading: true, trailing: true });
return (text) => new Promise((resolve) => {
resolver = resolve;
debounced(text);
});
})();
export { debouncedGuess };