Removed url tokenization
This commit is contained in:
parent
080805f714
commit
5d50f9cb82
|
@ -1,7 +1,7 @@
|
||||||
import type { QueryCombination } from 'minisearch'
|
import type { QueryCombination } from 'minisearch'
|
||||||
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
|
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
|
||||||
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
|
|
||||||
import type LocatorPlugin from '../main'
|
import type LocatorPlugin from '../main'
|
||||||
|
import { splitCamelCase, splitHyphens } from '../tools/utils'
|
||||||
|
|
||||||
const markdownLinkExtractor = require('markdown-link-extractor')
|
const markdownLinkExtractor = require('markdown-link-extractor')
|
||||||
|
|
||||||
|
@ -17,26 +17,16 @@ export class Tokenizer {
|
||||||
public tokenizeForIndexing(text: string): string[] {
|
public tokenizeForIndexing(text: string): string[] {
|
||||||
try {
|
try {
|
||||||
const words = this.tokenizeWords(text)
|
const words = this.tokenizeWords(text)
|
||||||
let urls: string[] = []
|
|
||||||
if (this.plugin.settings.tokenizeUrls) {
|
|
||||||
try {
|
|
||||||
urls = markdownLinkExtractor(text)
|
|
||||||
} catch (e) {
|
|
||||||
logVerbose('Error extracting urls', e)
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let tokens = this.tokenizeTokens(text, { skipChs: true })
|
let tokens = this.tokenizeTokens(text, { skipChs: true })
|
||||||
tokens = [...tokens.flatMap(token => [
|
tokens = [
|
||||||
token,
|
...tokens.flatMap(token => [
|
||||||
...splitHyphens(token),
|
token,
|
||||||
...splitCamelCase(token),
|
...splitHyphens(token),
|
||||||
]), ...words]
|
...splitCamelCase(token),
|
||||||
|
]),
|
||||||
// Add urls
|
...words,
|
||||||
if (urls.length) {
|
]
|
||||||
tokens = [...tokens, ...urls]
|
|
||||||
}
|
|
||||||
|
|
||||||
// Remove duplicates
|
// Remove duplicates
|
||||||
tokens = [...new Set(tokens)]
|
tokens = [...new Set(tokens)]
|
||||||
|
|
|
@ -107,7 +107,6 @@ export function getDefaultSettings(app: App): LocatorSettings {
|
||||||
showCreateButton: false,
|
showCreateButton: false,
|
||||||
showPreviousQueryResults: true,
|
showPreviousQueryResults: true,
|
||||||
simpleSearch: false,
|
simpleSearch: false,
|
||||||
tokenizeUrls: false,
|
|
||||||
fuzziness: '1',
|
fuzziness: '1',
|
||||||
|
|
||||||
weightBasename: 10,
|
weightBasename: 10,
|
||||||
|
|
|
@ -100,23 +100,6 @@ export function injectSettingsBehavior(
|
||||||
})
|
})
|
||||||
)
|
)
|
||||||
|
|
||||||
// Extract URLs
|
|
||||||
// Crashes on iOS
|
|
||||||
if (!Platform.isIosApp) {
|
|
||||||
new Setting(containerEl)
|
|
||||||
.setName('Tokenize URLs')
|
|
||||||
.setDesc(
|
|
||||||
`Enable this if you want to be able to search for URLs as separate words.
|
|
||||||
This setting has a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
|
|
||||||
)
|
|
||||||
.addToggle(toggle =>
|
|
||||||
toggle.setValue(settings.tokenizeUrls).onChange(async v => {
|
|
||||||
settings.tokenizeUrls = v
|
|
||||||
await saveSettings(plugin)
|
|
||||||
})
|
|
||||||
)
|
|
||||||
}
|
|
||||||
|
|
||||||
// Open in new pane
|
// Open in new pane
|
||||||
new Setting(containerEl)
|
new Setting(containerEl)
|
||||||
.setName('Open in new pane')
|
.setName('Open in new pane')
|
||||||
|
|
|
@ -70,7 +70,6 @@ export interface LocatorSettings extends WeightingSettings {
|
||||||
welcomeMessage: string
|
welcomeMessage: string
|
||||||
/** If a query returns 0 result, try again with more relax conditions */
|
/** If a query returns 0 result, try again with more relax conditions */
|
||||||
simpleSearch: boolean
|
simpleSearch: boolean
|
||||||
tokenizeUrls: boolean
|
|
||||||
splitCamelCase: boolean
|
splitCamelCase: boolean
|
||||||
openInNewPane: boolean
|
openInNewPane: boolean
|
||||||
verboseLogging: boolean
|
verboseLogging: boolean
|
||||||
|
|
Loading…
Reference in New Issue
Block a user