buggy Arabic diacritics removal

This commit is contained in:
Simon Cambier 2025-07-11 18:57:07 +02:00
parent 92a7af6ec5
commit 921a72b015

View File

@ -9,6 +9,7 @@ import {
import { md5 } from 'pure-md5'
import { eng, fra } from 'stopword'
import { isSearchMatch, type SearchMatch } from '../globals'
import { settings } from 'src/settings'
export function pathWithoutFilename(path: string): string {
const split = path.split('/')
@ -121,11 +122,12 @@ export function removeDiacritics(str: string, arabic = true): string {
return ''
}
// TODO: add a global setting to toggle this, this impacts performances
if (arabic) {
// Arabic diacritics
// https://stackoverflow.com/a/40959537
str = str
.replace(/([^\u0621-\u063A\u0641-\u064A\u0660-\u0669a-zA-Z 0-9])/g, '')
// .replace(/([^\u0621-\u063A\u0641-\u064A\u0660-\u0669a-zA-Z 0-9])/g, '')
.replace(/(آ|إ|أ)/g, 'ا')
.replace(/(ة)/g, 'ه')
.replace(/(ئ|ؤ)/g, 'ء')
@ -144,6 +146,7 @@ export function removeDiacritics(str: string, arabic = true): string {
str = str.normalize('NFD').replace(diacriticsRegex, '').normalize('NFC')
str = str.replaceAll('[__locator__backtick__]', '`')
str = str.replaceAll('[__locator__caret__]', '^')
return str
}