Always remove diacritics

This commit is contained in:
Simon Cambier 2025-06-21 13:29:37 +02:00
parent 42df9465cf
commit f0c16d5905
9 changed files with 24 additions and 72 deletions

View File

@ -11,10 +11,7 @@
import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView, Platform } from 'obsidian'
import ModalContainer from './ModalContainer.svelte'
import {
LocatorInFileModal,
LocatorVaultModal,
} from '../components/modals'
import { LocatorInFileModal, LocatorVaultModal } from '../components/modals'
import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from '../search/query'
import { openNote } from '../tools/notes'
@ -51,8 +48,8 @@
$: (async () => {
if (searchQuery) {
query = new Query(searchQuery, {
ignoreDiacritics: plugin.settings.ignoreDiacritics,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics,
ignoreDiacritics: true,
ignoreArabicDiacritics: true,
})
note =
(
@ -163,13 +160,13 @@
</script>
<InputSearch
plugin="{plugin}"
on:input="{e => (searchQuery = e.detail)}"
{plugin}
on:input={e => (searchQuery = e.detail)}
placeholder="Locator - File"
initialValue="{previousQuery}">
initialValue={previousQuery}>
<div class="omnisearch-input-container__buttons">
{#if Platform.isMobile}
<button on:click="{switchToVaultModal}">Vault search</button>
<button on:click={switchToVaultModal}>Vault search</button>
{/if}
</div>
</InputSearch>
@ -179,15 +176,15 @@
{#each groupedOffsets as offset, i}
<ResultItemInFile
{plugin}
offset="{offset}"
note="{note}"
index="{i}"
selected="{i === selectedIndex}"
on:mousemove="{_e => (selectedIndex = i)}"
on:click="{evt => openSelection(evt.ctrlKey)}"
on:auxclick="{evt => {
{offset}
{note}
index={i}
selected={i === selectedIndex}
on:mousemove={_e => (selectedIndex = i)}
on:click={evt => openSelection(evt.ctrlKey)}
on:auxclick={evt => {
if (evt.button == 1) openSelection(true)
}}" />
}} />
{/each}
{:else}
<div style="text-align: center;">

View File

@ -149,8 +149,8 @@
cancelableQuery = null
}
query = new Query(searchQuery, {
ignoreDiacritics: plugin.settings.ignoreDiacritics,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics,
ignoreDiacritics: true,
ignoreArabicDiacritics: true,
})
cancelableQuery = cancelable(
new Promise(resolve => {

View File

@ -331,9 +331,7 @@ export class SearchEngine {
results.map(async result => {
const doc = await this.plugin.documentsRepository.getDocument(result.id)
if (!doc) {
console.warn(
`Locator - Note "${result.id}" not in the live cache`
)
console.warn(`Locator - Note "${result.id}" not in the live cache`)
countError(true)
}
return doc
@ -349,12 +347,7 @@ export class SearchEngine {
const title = document?.path.toLowerCase() ?? ''
const content = (document?.cleanedContent ?? '').toLowerCase()
return exactTerms.every(
q =>
content.includes(q) ||
removeDiacritics(
title,
this.plugin.settings.ignoreArabicDiacritics
).includes(q)
q => content.includes(q) || removeDiacritics(title).includes(q)
)
})
}
@ -524,11 +517,7 @@ export class SearchEngine {
}
return (doc as any)[fieldName]
},
processTerm: (term: string) =>
(this.plugin.settings.ignoreDiacritics
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
: term
).toLowerCase(),
processTerm: (term: string) => removeDiacritics(term).toLowerCase(),
idField: 'path',
fields: [
'basename',

View File

@ -100,8 +100,6 @@ export function getDefaultSettings(app: App): LocatorSettings {
hideExcluded: false,
recencyBoost: RecencyCutoff.Disabled,
downrankedFoldersFilters: [] as string[],
ignoreDiacritics: true,
ignoreArabicDiacritics: false,
indexedFileTypes: [] as string[],
displayTitle: '',
PDFIndexing: false,

View File

@ -15,33 +15,6 @@ export function injectSettingsDanger(
new Setting(containerEl).setName('Danger Zone').setHeading()
// Ignore diacritics
new Setting(containerEl)
.setName('Ignore diacritics')
.setDesc(
htmlDescription(`Normalize diacritics in search terms. Words like "brûlée" or "žluťoučký" will be indexed as "brulee" and "zlutoucky".<br/>
<span style="color: var(--text-accent)">You probably should <strong>NOT</strong> disable this.</span><br>
<span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
${needsARestart}`)
)
.addToggle(toggle =>
toggle.setValue(settings.ignoreDiacritics).onChange(async v => {
await database.clearCache()
settings.ignoreDiacritics = v
await saveSettings(plugin)
})
)
new Setting(containerEl)
.setName('Ignore Arabic diacritics (beta)')
.addToggle(toggle =>
toggle.setValue(settings.ignoreArabicDiacritics).onChange(async v => {
await database.clearCache()
settings.ignoreArabicDiacritics = v
await saveSettings(plugin)
})
)
// Disable Locator
const disableDesc = new DocumentFragment()
disableDesc.createSpan({}, span => {

View File

@ -41,9 +41,6 @@ export interface LocatorSettings extends WeightingSettings {
recencyBoost: RecencyCutoff
/** downrank files in the given folders */
downrankedFoldersFilters: string[]
/** Ignore diacritics when indexing files */
ignoreDiacritics: boolean
ignoreArabicDiacritics: boolean
/** Extensions of plain text files to index, in addition to .md */
indexedFileTypes: string[]

View File

@ -87,8 +87,8 @@ export function getApi(plugin: LocatorPlugin) {
return {
async search(q: string): Promise<ResultNoteApi[]> {
const query = new Query(q, {
ignoreDiacritics: plugin.settings.ignoreDiacritics,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics,
ignoreDiacritics: true,
ignoreArabicDiacritics: true,
})
const raw = await plugin.searchEngine.getSuggestions(query)
return mapResults(plugin, raw)

View File

@ -68,9 +68,7 @@ export class TextProcessor {
const reg = this.stringsToRegex(words)
const originalText = text
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
if (this.plugin.settings.ignoreDiacritics) {
text = removeDiacritics(text, this.plugin.settings.ignoreArabicDiacritics)
}
text = removeDiacritics(text)
const startTime = new Date().getTime()
let match: RegExpExecArray | null = null
let matches: SearchMatch[] = []

View File

@ -115,7 +115,7 @@ const diacriticsRegex = new RegExp(`(?!${regexpExclude})\\p{Diacritic}`, 'gu')
/**
* https://stackoverflow.com/a/37511463
*/
export function removeDiacritics(str: string, arabic = false): string {
export function removeDiacritics(str: string, arabic = true): string {
if (str === null || str === undefined) {
return ''
}