Removed url tokenization

This commit is contained in:
Simon Cambier 2025-06-21 13:40:11 +02:00
parent 080805f714
commit 5d50f9cb82
4 changed files with 9 additions and 38 deletions

View File

@ -1,7 +1,7 @@
import type { QueryCombination } from 'minisearch' import type { QueryCombination } from 'minisearch'
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals' import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
import type LocatorPlugin from '../main' import type LocatorPlugin from '../main'
import { splitCamelCase, splitHyphens } from '../tools/utils'
const markdownLinkExtractor = require('markdown-link-extractor') const markdownLinkExtractor = require('markdown-link-extractor')
@ -17,26 +17,16 @@ export class Tokenizer {
public tokenizeForIndexing(text: string): string[] { public tokenizeForIndexing(text: string): string[] {
try { try {
const words = this.tokenizeWords(text) const words = this.tokenizeWords(text)
let urls: string[] = []
if (this.plugin.settings.tokenizeUrls) {
try {
urls = markdownLinkExtractor(text)
} catch (e) {
logVerbose('Error extracting urls', e)
}
}
let tokens = this.tokenizeTokens(text, { skipChs: true }) let tokens = this.tokenizeTokens(text, { skipChs: true })
tokens = [...tokens.flatMap(token => [ tokens = [
token, ...tokens.flatMap(token => [
...splitHyphens(token), token,
...splitCamelCase(token), ...splitHyphens(token),
]), ...words] ...splitCamelCase(token),
]),
// Add urls ...words,
if (urls.length) { ]
tokens = [...tokens, ...urls]
}
// Remove duplicates // Remove duplicates
tokens = [...new Set(tokens)] tokens = [...new Set(tokens)]

View File

@ -107,7 +107,6 @@ export function getDefaultSettings(app: App): LocatorSettings {
showCreateButton: false, showCreateButton: false,
showPreviousQueryResults: true, showPreviousQueryResults: true,
simpleSearch: false, simpleSearch: false,
tokenizeUrls: false,
fuzziness: '1', fuzziness: '1',
weightBasename: 10, weightBasename: 10,

View File

@ -100,23 +100,6 @@ export function injectSettingsBehavior(
}) })
) )
// Extract URLs
// Crashes on iOS
if (!Platform.isIosApp) {
new Setting(containerEl)
.setName('Tokenize URLs')
.setDesc(
`Enable this if you want to be able to search for URLs as separate words.
This setting has a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
)
.addToggle(toggle =>
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
settings.tokenizeUrls = v
await saveSettings(plugin)
})
)
}
// Open in new pane // Open in new pane
new Setting(containerEl) new Setting(containerEl)
.setName('Open in new pane') .setName('Open in new pane')

View File

@ -70,7 +70,6 @@ export interface LocatorSettings extends WeightingSettings {
welcomeMessage: string welcomeMessage: string
/** If a query returns 0 result, try again with more relax conditions */ /** If a query returns 0 result, try again with more relax conditions */
simpleSearch: boolean simpleSearch: boolean
tokenizeUrls: boolean
splitCamelCase: boolean splitCamelCase: boolean
openInNewPane: boolean openInNewPane: boolean
verboseLogging: boolean verboseLogging: boolean