Always remove diacritics

This commit is contained in:
Simon Cambier 2025-06-21 13:29:37 +02:00
parent 42df9465cf
commit f0c16d5905
9 changed files with 24 additions and 72 deletions

View File

@ -11,10 +11,7 @@
import { onDestroy, onMount, tick } from 'svelte' import { onDestroy, onMount, tick } from 'svelte'
import { MarkdownView, Platform } from 'obsidian' import { MarkdownView, Platform } from 'obsidian'
import ModalContainer from './ModalContainer.svelte' import ModalContainer from './ModalContainer.svelte'
import { import { LocatorInFileModal, LocatorVaultModal } from '../components/modals'
LocatorInFileModal,
LocatorVaultModal,
} from '../components/modals'
import ResultItemInFile from './ResultItemInFile.svelte' import ResultItemInFile from './ResultItemInFile.svelte'
import { Query } from '../search/query' import { Query } from '../search/query'
import { openNote } from '../tools/notes' import { openNote } from '../tools/notes'
@ -51,8 +48,8 @@
$: (async () => { $: (async () => {
if (searchQuery) { if (searchQuery) {
query = new Query(searchQuery, { query = new Query(searchQuery, {
ignoreDiacritics: plugin.settings.ignoreDiacritics, ignoreDiacritics: true,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics, ignoreArabicDiacritics: true,
}) })
note = note =
( (
@ -163,13 +160,13 @@
</script> </script>
<InputSearch <InputSearch
plugin="{plugin}" {plugin}
on:input="{e => (searchQuery = e.detail)}" on:input={e => (searchQuery = e.detail)}
placeholder="Locator - File" placeholder="Locator - File"
initialValue="{previousQuery}"> initialValue={previousQuery}>
<div class="omnisearch-input-container__buttons"> <div class="omnisearch-input-container__buttons">
{#if Platform.isMobile} {#if Platform.isMobile}
<button on:click="{switchToVaultModal}">Vault search</button> <button on:click={switchToVaultModal}>Vault search</button>
{/if} {/if}
</div> </div>
</InputSearch> </InputSearch>
@ -179,15 +176,15 @@
{#each groupedOffsets as offset, i} {#each groupedOffsets as offset, i}
<ResultItemInFile <ResultItemInFile
{plugin} {plugin}
offset="{offset}" {offset}
note="{note}" {note}
index="{i}" index={i}
selected="{i === selectedIndex}" selected={i === selectedIndex}
on:mousemove="{_e => (selectedIndex = i)}" on:mousemove={_e => (selectedIndex = i)}
on:click="{evt => openSelection(evt.ctrlKey)}" on:click={evt => openSelection(evt.ctrlKey)}
on:auxclick="{evt => { on:auxclick={evt => {
if (evt.button == 1) openSelection(true) if (evt.button == 1) openSelection(true)
}}" /> }} />
{/each} {/each}
{:else} {:else}
<div style="text-align: center;"> <div style="text-align: center;">

View File

@ -149,8 +149,8 @@
cancelableQuery = null cancelableQuery = null
} }
query = new Query(searchQuery, { query = new Query(searchQuery, {
ignoreDiacritics: plugin.settings.ignoreDiacritics, ignoreDiacritics: true,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics, ignoreArabicDiacritics: true,
}) })
cancelableQuery = cancelable( cancelableQuery = cancelable(
new Promise(resolve => { new Promise(resolve => {

View File

@ -331,9 +331,7 @@ export class SearchEngine {
results.map(async result => { results.map(async result => {
const doc = await this.plugin.documentsRepository.getDocument(result.id) const doc = await this.plugin.documentsRepository.getDocument(result.id)
if (!doc) { if (!doc) {
console.warn( console.warn(`Locator - Note "${result.id}" not in the live cache`)
`Locator - Note "${result.id}" not in the live cache`
)
countError(true) countError(true)
} }
return doc return doc
@ -349,12 +347,7 @@ export class SearchEngine {
const title = document?.path.toLowerCase() ?? '' const title = document?.path.toLowerCase() ?? ''
const content = (document?.cleanedContent ?? '').toLowerCase() const content = (document?.cleanedContent ?? '').toLowerCase()
return exactTerms.every( return exactTerms.every(
q => q => content.includes(q) || removeDiacritics(title).includes(q)
content.includes(q) ||
removeDiacritics(
title,
this.plugin.settings.ignoreArabicDiacritics
).includes(q)
) )
}) })
} }
@ -524,11 +517,7 @@ export class SearchEngine {
} }
return (doc as any)[fieldName] return (doc as any)[fieldName]
}, },
processTerm: (term: string) => processTerm: (term: string) => removeDiacritics(term).toLowerCase(),
(this.plugin.settings.ignoreDiacritics
? removeDiacritics(term, this.plugin.settings.ignoreArabicDiacritics)
: term
).toLowerCase(),
idField: 'path', idField: 'path',
fields: [ fields: [
'basename', 'basename',

View File

@ -100,8 +100,6 @@ export function getDefaultSettings(app: App): LocatorSettings {
hideExcluded: false, hideExcluded: false,
recencyBoost: RecencyCutoff.Disabled, recencyBoost: RecencyCutoff.Disabled,
downrankedFoldersFilters: [] as string[], downrankedFoldersFilters: [] as string[],
ignoreDiacritics: true,
ignoreArabicDiacritics: false,
indexedFileTypes: [] as string[], indexedFileTypes: [] as string[],
displayTitle: '', displayTitle: '',
PDFIndexing: false, PDFIndexing: false,

View File

@ -15,33 +15,6 @@ export function injectSettingsDanger(
new Setting(containerEl).setName('Danger Zone').setHeading() new Setting(containerEl).setName('Danger Zone').setHeading()
// Ignore diacritics
new Setting(containerEl)
.setName('Ignore diacritics')
.setDesc(
htmlDescription(`Normalize diacritics in search terms. Words like "brûlée" or "žluťoučký" will be indexed as "brulee" and "zlutoucky".<br/>
<span style="color: var(--text-accent)">You probably should <strong>NOT</strong> disable this.</span><br>
<span style="color: var(--text-accent)">Changing this setting will clear the cache.</span><br>
${needsARestart}`)
)
.addToggle(toggle =>
toggle.setValue(settings.ignoreDiacritics).onChange(async v => {
await database.clearCache()
settings.ignoreDiacritics = v
await saveSettings(plugin)
})
)
new Setting(containerEl)
.setName('Ignore Arabic diacritics (beta)')
.addToggle(toggle =>
toggle.setValue(settings.ignoreArabicDiacritics).onChange(async v => {
await database.clearCache()
settings.ignoreArabicDiacritics = v
await saveSettings(plugin)
})
)
// Disable Locator // Disable Locator
const disableDesc = new DocumentFragment() const disableDesc = new DocumentFragment()
disableDesc.createSpan({}, span => { disableDesc.createSpan({}, span => {

View File

@ -41,9 +41,6 @@ export interface LocatorSettings extends WeightingSettings {
recencyBoost: RecencyCutoff recencyBoost: RecencyCutoff
/** downrank files in the given folders */ /** downrank files in the given folders */
downrankedFoldersFilters: string[] downrankedFoldersFilters: string[]
/** Ignore diacritics when indexing files */
ignoreDiacritics: boolean
ignoreArabicDiacritics: boolean
/** Extensions of plain text files to index, in addition to .md */ /** Extensions of plain text files to index, in addition to .md */
indexedFileTypes: string[] indexedFileTypes: string[]

View File

@ -87,8 +87,8 @@ export function getApi(plugin: LocatorPlugin) {
return { return {
async search(q: string): Promise<ResultNoteApi[]> { async search(q: string): Promise<ResultNoteApi[]> {
const query = new Query(q, { const query = new Query(q, {
ignoreDiacritics: plugin.settings.ignoreDiacritics, ignoreDiacritics: true,
ignoreArabicDiacritics: plugin.settings.ignoreArabicDiacritics, ignoreArabicDiacritics: true,
}) })
const raw = await plugin.searchEngine.getSuggestions(query) const raw = await plugin.searchEngine.getSuggestions(query)
return mapResults(plugin, raw) return mapResults(plugin, raw)

View File

@ -68,9 +68,7 @@ export class TextProcessor {
const reg = this.stringsToRegex(words) const reg = this.stringsToRegex(words)
const originalText = text const originalText = text
// text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ') // text = text.toLowerCase().replace(new RegExp(SEPARATORS, 'gu'), ' ')
if (this.plugin.settings.ignoreDiacritics) { text = removeDiacritics(text)
text = removeDiacritics(text, this.plugin.settings.ignoreArabicDiacritics)
}
const startTime = new Date().getTime() const startTime = new Date().getTime()
let match: RegExpExecArray | null = null let match: RegExpExecArray | null = null
let matches: SearchMatch[] = [] let matches: SearchMatch[] = []

View File

@ -115,7 +115,7 @@ const diacriticsRegex = new RegExp(`(?!${regexpExclude})\\p{Diacritic}`, 'gu')
/** /**
* https://stackoverflow.com/a/37511463 * https://stackoverflow.com/a/37511463
*/ */
export function removeDiacritics(str: string, arabic = false): string { export function removeDiacritics(str: string, arabic = true): string {
if (str === null || str === undefined) { if (str === null || str === undefined) {
return '' return ''
} }