mirror of
https://github.com/mastodon/mastodon.git
synced 2025-09-06 01:41:08 +00:00
Restructure to use laude or LanguageDetector
This commit is contained in:
parent
3e1e6762aa
commit
bac259debf
|
@ -10,8 +10,6 @@ import ImmutablePureComponent from 'react-immutable-pure-component';
|
||||||
|
|
||||||
import { length } from 'stringz';
|
import { length } from 'stringz';
|
||||||
|
|
||||||
import debounce from 'lodash.debounce';
|
|
||||||
|
|
||||||
import { missingAltTextModal } from 'mastodon/initial_state';
|
import { missingAltTextModal } from 'mastodon/initial_state';
|
||||||
import { changeComposeLanguage } from 'mastodon/actions/compose';
|
import { changeComposeLanguage } from 'mastodon/actions/compose';
|
||||||
|
|
||||||
|
@ -24,6 +22,7 @@ import PrivacyDropdownContainer from '../containers/privacy_dropdown_container';
|
||||||
import SpoilerButtonContainer from '../containers/spoiler_button_container';
|
import SpoilerButtonContainer from '../containers/spoiler_button_container';
|
||||||
import UploadButtonContainer from '../containers/upload_button_container';
|
import UploadButtonContainer from '../containers/upload_button_container';
|
||||||
import { countableText } from '../util/counter';
|
import { countableText } from '../util/counter';
|
||||||
|
import { debouncedGuess, countLetters } from '../util/language_detection';
|
||||||
|
|
||||||
import { CharacterCounter } from './character_counter';
|
import { CharacterCounter } from './character_counter';
|
||||||
import { EditIndicator } from './edit_indicator';
|
import { EditIndicator } from './edit_indicator';
|
||||||
|
@ -50,24 +49,6 @@ const mapStateToProps = (state) => ({
|
||||||
currentLanguage: state.meta.get('locale'),
|
currentLanguage: state.meta.get('locale'),
|
||||||
});
|
});
|
||||||
|
|
||||||
const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis;
|
|
||||||
let supportsLanguageDetector = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available';
|
|
||||||
let languageDetector;
|
|
||||||
// If the API is supported, but the model not loaded yet…
|
|
||||||
if (languageDetectorInGlobalThis && !supportsLanguageDetector) {
|
|
||||||
// …trigger the model download
|
|
||||||
LanguageDetector.create().then((_languageDetector) => {
|
|
||||||
supportsLanguageDetector = true
|
|
||||||
languageDetector = _languageDetector
|
|
||||||
})
|
|
||||||
}
|
|
||||||
|
|
||||||
function countLetters(text) {
|
|
||||||
const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' })
|
|
||||||
const letters = [...segmenter.segment(text)]
|
|
||||||
return letters.length
|
|
||||||
}
|
|
||||||
|
|
||||||
class ComposeForm extends ImmutablePureComponent {
|
class ComposeForm extends ImmutablePureComponent {
|
||||||
static propTypes = {
|
static propTypes = {
|
||||||
intl: PropTypes.object.isRequired,
|
intl: PropTypes.object.isRequired,
|
||||||
|
@ -112,7 +93,6 @@ class ComposeForm extends ImmutablePureComponent {
|
||||||
constructor(props) {
|
constructor(props) {
|
||||||
super(props);
|
super(props);
|
||||||
this.textareaRef = createRef(null);
|
this.textareaRef = createRef(null);
|
||||||
this.debouncedHandleKeyUp = debounce(this._handleKeyUp.bind(this), 500);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
handleChange = (e) => {
|
handleChange = (e) => {
|
||||||
|
@ -125,30 +105,21 @@ class ComposeForm extends ImmutablePureComponent {
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
handleKeyUp = (e) => {
|
handleKeyUp = async (e) => {
|
||||||
this.debouncedHandleKeyUp(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
_handleKeyUp = async (e) => {
|
|
||||||
if (!supportsLanguageDetector) {
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (!languageDetector) {
|
|
||||||
languageDetector = await globalThis.LanguageDetector.create();
|
|
||||||
}
|
|
||||||
const text = this.getFulltextForCharacterCounting().trim();
|
const text = this.getFulltextForCharacterCounting().trim();
|
||||||
const currentLanguage = this.props.currentLanguage;
|
const currentLanguage = this.props.currentLanguage;
|
||||||
if (!text || countLetters(text) <= 5) {
|
if (!text || countLetters(text) <= 5) {
|
||||||
this.props.dispatch(changeComposeLanguage(currentLanguage));
|
this.props.dispatch(changeComposeLanguage(currentLanguage));
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
let detectedLanguage = (await languageDetector.detect(text))[0].detectedLanguage
|
let detectedLanguage = await debouncedGuess(text);
|
||||||
detectedLanguage = detectedLanguage === 'und' ? currentLanguage : detectedLanguage.substring(0, 2);
|
if (!detectedLanguage) {
|
||||||
|
this.props.dispatch(changeComposeLanguage(currentLanguage));
|
||||||
|
return;
|
||||||
|
}
|
||||||
this.props.dispatch(changeComposeLanguage(detectedLanguage));
|
this.props.dispatch(changeComposeLanguage(detectedLanguage));
|
||||||
}
|
} catch {
|
||||||
catch {
|
|
||||||
this.props.dispatch(changeComposeLanguage(currentLanguage));
|
this.props.dispatch(changeComposeLanguage(currentLanguage));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -218,7 +189,6 @@ class ComposeForm extends ImmutablePureComponent {
|
||||||
|
|
||||||
componentWillUnmount () {
|
componentWillUnmount () {
|
||||||
if (this.timeout) clearTimeout(this.timeout);
|
if (this.timeout) clearTimeout(this.timeout);
|
||||||
this.debouncedHandleKeyUp.cancel();
|
|
||||||
}
|
}
|
||||||
|
|
||||||
componentDidUpdate (prevProps) {
|
componentDidUpdate (prevProps) {
|
||||||
|
|
|
@ -20,7 +20,7 @@ import { languages as preloadedLanguages } from 'mastodon/initial_state';
|
||||||
import type { RootState } from 'mastodon/store';
|
import type { RootState } from 'mastodon/store';
|
||||||
import { useAppSelector, useAppDispatch } from 'mastodon/store';
|
import { useAppSelector, useAppDispatch } from 'mastodon/store';
|
||||||
|
|
||||||
import { debouncedGuess } from '../util/language_detection';
|
import { debouncedGuess, countLetters } from '../util/language_detection';
|
||||||
|
|
||||||
const messages = defineMessages({
|
const messages = defineMessages({
|
||||||
changeLanguage: {
|
changeLanguage: {
|
||||||
|
@ -375,12 +375,25 @@ export const LanguageDropdown: React.FC = () => {
|
||||||
);
|
);
|
||||||
|
|
||||||
useEffect(() => {
|
useEffect(() => {
|
||||||
if (text.length > 20) {
|
let canceled = false;
|
||||||
debouncedGuess(text, setGuess);
|
|
||||||
|
if (countLetters(text) >= 5) {
|
||||||
|
debouncedGuess(text)
|
||||||
|
.then((lang) => {
|
||||||
|
if (!canceled) {
|
||||||
|
setGuess(lang ?? '');
|
||||||
|
}
|
||||||
|
})
|
||||||
|
.catch(() => {
|
||||||
|
setGuess('');
|
||||||
|
});
|
||||||
} else {
|
} else {
|
||||||
debouncedGuess.cancel();
|
|
||||||
setGuess('');
|
setGuess('');
|
||||||
}
|
}
|
||||||
|
|
||||||
|
return () => {
|
||||||
|
canceled = true;
|
||||||
|
};
|
||||||
}, [text, setGuess]);
|
}, [text, setGuess]);
|
||||||
|
|
||||||
return (
|
return (
|
||||||
|
|
5
app/javascript/mastodon/features/compose/util/language_detection.d.ts
vendored
Normal file
5
app/javascript/mastodon/features/compose/util/language_detection.d.ts
vendored
Normal file
|
@ -0,0 +1,5 @@
|
||||||
|
export declare const debouncedGuess: (
|
||||||
|
text: string,
|
||||||
|
) => Promise<string | undefined>;
|
||||||
|
|
||||||
|
export declare const countLetters: (text: string) => number;
|
|
@ -1,7 +1,5 @@
|
||||||
import lande from 'lande';
|
const languageDetectorInGlobalThis = 'LanguageDetector' in globalThis;
|
||||||
import { debounce } from 'lodash';
|
let languageDetectorSupportedAndReady = languageDetectorInGlobalThis && await globalThis.LanguageDetector.availability() === 'available';
|
||||||
|
|
||||||
import { urlRegex } from './url_regex';
|
|
||||||
|
|
||||||
const ISO_639_MAP = {
|
const ISO_639_MAP = {
|
||||||
afr: 'af', // Afrikaans
|
afr: 'af', // Afrikaans
|
||||||
|
@ -56,21 +54,23 @@ const ISO_639_MAP = {
|
||||||
vie: 'vi', // Vietnamese
|
vie: 'vi', // Vietnamese
|
||||||
};
|
};
|
||||||
|
|
||||||
const guessLanguage = (text) => {
|
const countLetters = (text) => {
|
||||||
text = text
|
const segmenter = new Intl.Segmenter('und', { granularity: 'grapheme' })
|
||||||
.replace(urlRegex, '')
|
const letters = [...segmenter.segment(text)]
|
||||||
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
|
return letters.length
|
||||||
|
|
||||||
if (text.length > 20) {
|
|
||||||
const [lang, confidence] = lande(text)[0];
|
|
||||||
|
|
||||||
if (confidence > 0.8)
|
|
||||||
return ISO_639_MAP[lang];
|
|
||||||
}
|
|
||||||
|
|
||||||
return '';
|
|
||||||
};
|
};
|
||||||
|
|
||||||
export const debouncedGuess = debounce((text, setGuess) => {
|
let module;
|
||||||
setGuess(guessLanguage(text));
|
// If the API is supported, but the model not loaded yet…
|
||||||
}, 500, { maxWait: 1500, leading: true, trailing: true });
|
if (languageDetectorInGlobalThis) {
|
||||||
|
if (!languageDetectorSupportedAndReady) {
|
||||||
|
// …trigger the model download
|
||||||
|
self.LanguageDetector.create();
|
||||||
|
}
|
||||||
|
module = await import('./language_detection_with_languagedetector');
|
||||||
|
} else {
|
||||||
|
module = await import('./language_detection_with_laude');
|
||||||
|
}
|
||||||
|
const debouncedGuess = module.debouncedGuess;
|
||||||
|
|
||||||
|
export { debouncedGuess, countLetters, ISO_639_MAP };
|
||||||
|
|
|
@ -0,0 +1,50 @@
|
||||||
|
import { debounce } from 'lodash';
|
||||||
|
|
||||||
|
import { countLetters } from './language_detection';
|
||||||
|
import { urlRegex } from './url_regex';
|
||||||
|
|
||||||
|
const guessLanguage = async (text) => {
|
||||||
|
text = text
|
||||||
|
.replace(urlRegex, '')
|
||||||
|
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
|
||||||
|
|
||||||
|
if (countLetters(text) > 5) {
|
||||||
|
try {
|
||||||
|
const languageDetector = await self.LanguageDetector.create();
|
||||||
|
let {detectedLanguage, confidence} = (await languageDetector.detect(text))[0];
|
||||||
|
if (confidence > 0.8) {
|
||||||
|
detectedLanguage = detectedLanguage.split('-')[0];
|
||||||
|
return detectedLanguage;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
return '';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
};
|
||||||
|
|
||||||
|
const debouncedGuess = (() => {
|
||||||
|
let resolver = null;
|
||||||
|
let rejecter = null;
|
||||||
|
|
||||||
|
const debounced = debounce(async (text) => {
|
||||||
|
try {
|
||||||
|
const result = await guessLanguage(text);
|
||||||
|
if (resolver) {
|
||||||
|
resolver(result);
|
||||||
|
resolver = null;
|
||||||
|
}
|
||||||
|
} catch {
|
||||||
|
rejecter('');
|
||||||
|
}
|
||||||
|
}, 500, { maxWait: 1500, leading: true, trailing: true });
|
||||||
|
|
||||||
|
return (text) => new Promise((resolve, reject) => {
|
||||||
|
resolver = resolve;
|
||||||
|
rejecter = reject;
|
||||||
|
debounced(text);
|
||||||
|
});
|
||||||
|
})();
|
||||||
|
|
||||||
|
export { debouncedGuess };
|
|
@ -0,0 +1,38 @@
|
||||||
|
import lande from 'lande';
|
||||||
|
import { debounce } from 'lodash';
|
||||||
|
|
||||||
|
import { countLetters, ISO_639_MAP } from './language_detection';
|
||||||
|
import { urlRegex } from './url_regex';
|
||||||
|
|
||||||
|
const guessLanguage = (text) => {
|
||||||
|
text = text
|
||||||
|
.replace(urlRegex, '')
|
||||||
|
.replace(/(^|[^/\w])@(([a-z0-9_]+)@[a-z0-9.-]+[a-z0-9]+)/ig, '');
|
||||||
|
|
||||||
|
if (countLetters(text) > 20) {
|
||||||
|
const [lang, confidence] = lande(text)[0];
|
||||||
|
if (confidence > 0.8)
|
||||||
|
return ISO_639_MAP[lang];
|
||||||
|
}
|
||||||
|
|
||||||
|
return '';
|
||||||
|
};
|
||||||
|
|
||||||
|
const debouncedGuess = (() => {
|
||||||
|
let resolver = null;
|
||||||
|
|
||||||
|
const debounced = debounce((text) => {
|
||||||
|
const result = guessLanguage(text);
|
||||||
|
if (resolver) {
|
||||||
|
resolver(result);
|
||||||
|
resolver = null;
|
||||||
|
}
|
||||||
|
}, 500, { maxWait: 1500, leading: true, trailing: true });
|
||||||
|
|
||||||
|
return (text) => new Promise((resolve) => {
|
||||||
|
resolver = resolve;
|
||||||
|
debounced(text);
|
||||||
|
});
|
||||||
|
})();
|
||||||
|
|
||||||
|
export { debouncedGuess };
|
Loading…
Reference in New Issue
Block a user