Removed url tokenization
This commit is contained in:
		
							parent
							
								
									080805f714
								
							
						
					
					
						commit
						5d50f9cb82
					
				| 
						 | 
					@ -1,7 +1,7 @@
 | 
				
			||||||
import type { QueryCombination } from 'minisearch'
 | 
					import type { QueryCombination } from 'minisearch'
 | 
				
			||||||
import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
 | 
					import { BRACKETS_AND_SPACE, chsRegex, SPACE_OR_PUNCTUATION } from '../globals'
 | 
				
			||||||
import { logVerbose, splitCamelCase, splitHyphens } from '../tools/utils'
 | 
					 | 
				
			||||||
import type LocatorPlugin from '../main'
 | 
					import type LocatorPlugin from '../main'
 | 
				
			||||||
 | 
					import { splitCamelCase, splitHyphens } from '../tools/utils'
 | 
				
			||||||
 | 
					
 | 
				
			||||||
const markdownLinkExtractor = require('markdown-link-extractor')
 | 
					const markdownLinkExtractor = require('markdown-link-extractor')
 | 
				
			||||||
 | 
					
 | 
				
			||||||
| 
						 | 
					@ -17,26 +17,16 @@ export class Tokenizer {
 | 
				
			||||||
  public tokenizeForIndexing(text: string): string[] {
 | 
					  public tokenizeForIndexing(text: string): string[] {
 | 
				
			||||||
    try {
 | 
					    try {
 | 
				
			||||||
      const words = this.tokenizeWords(text)
 | 
					      const words = this.tokenizeWords(text)
 | 
				
			||||||
      let urls: string[] = []
 | 
					 | 
				
			||||||
      if (this.plugin.settings.tokenizeUrls) {
 | 
					 | 
				
			||||||
        try {
 | 
					 | 
				
			||||||
          urls = markdownLinkExtractor(text)
 | 
					 | 
				
			||||||
        } catch (e) {
 | 
					 | 
				
			||||||
          logVerbose('Error extracting urls', e)
 | 
					 | 
				
			||||||
        }
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      let tokens = this.tokenizeTokens(text, { skipChs: true })
 | 
					      let tokens = this.tokenizeTokens(text, { skipChs: true })
 | 
				
			||||||
      tokens = [...tokens.flatMap(token => [
 | 
					      tokens = [
 | 
				
			||||||
        token,
 | 
					        ...tokens.flatMap(token => [
 | 
				
			||||||
        ...splitHyphens(token),
 | 
					          token,
 | 
				
			||||||
        ...splitCamelCase(token),
 | 
					          ...splitHyphens(token),
 | 
				
			||||||
      ]), ...words]
 | 
					          ...splitCamelCase(token),
 | 
				
			||||||
 | 
					        ]),
 | 
				
			||||||
      // Add urls
 | 
					        ...words,
 | 
				
			||||||
      if (urls.length) {
 | 
					      ]
 | 
				
			||||||
        tokens = [...tokens, ...urls]
 | 
					 | 
				
			||||||
      }
 | 
					 | 
				
			||||||
 | 
					
 | 
				
			||||||
      // Remove duplicates
 | 
					      // Remove duplicates
 | 
				
			||||||
      tokens = [...new Set(tokens)]
 | 
					      tokens = [...new Set(tokens)]
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -107,7 +107,6 @@ export function getDefaultSettings(app: App): LocatorSettings {
 | 
				
			||||||
    showCreateButton: false,
 | 
					    showCreateButton: false,
 | 
				
			||||||
    showPreviousQueryResults: true,
 | 
					    showPreviousQueryResults: true,
 | 
				
			||||||
    simpleSearch: false,
 | 
					    simpleSearch: false,
 | 
				
			||||||
    tokenizeUrls: false,
 | 
					 | 
				
			||||||
    fuzziness: '1',
 | 
					    fuzziness: '1',
 | 
				
			||||||
 | 
					
 | 
				
			||||||
    weightBasename: 10,
 | 
					    weightBasename: 10,
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -100,23 +100,6 @@ export function injectSettingsBehavior(
 | 
				
			||||||
      })
 | 
					      })
 | 
				
			||||||
    )
 | 
					    )
 | 
				
			||||||
 | 
					
 | 
				
			||||||
  // Extract URLs
 | 
					 | 
				
			||||||
  // Crashes on iOS
 | 
					 | 
				
			||||||
  if (!Platform.isIosApp) {
 | 
					 | 
				
			||||||
    new Setting(containerEl)
 | 
					 | 
				
			||||||
      .setName('Tokenize URLs')
 | 
					 | 
				
			||||||
      .setDesc(
 | 
					 | 
				
			||||||
        `Enable this if you want to be able to search for URLs as separate words.
 | 
					 | 
				
			||||||
        This setting has a strong impact on indexing performance, and can crash Obsidian under certain conditions.`
 | 
					 | 
				
			||||||
      )
 | 
					 | 
				
			||||||
      .addToggle(toggle =>
 | 
					 | 
				
			||||||
        toggle.setValue(settings.tokenizeUrls).onChange(async v => {
 | 
					 | 
				
			||||||
          settings.tokenizeUrls = v
 | 
					 | 
				
			||||||
          await saveSettings(plugin)
 | 
					 | 
				
			||||||
        })
 | 
					 | 
				
			||||||
      )
 | 
					 | 
				
			||||||
  }
 | 
					 | 
				
			||||||
 | 
					 | 
				
			||||||
  // Open in new pane
 | 
					  // Open in new pane
 | 
				
			||||||
  new Setting(containerEl)
 | 
					  new Setting(containerEl)
 | 
				
			||||||
    .setName('Open in new pane')
 | 
					    .setName('Open in new pane')
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
| 
						 | 
					@ -70,7 +70,6 @@ export interface LocatorSettings extends WeightingSettings {
 | 
				
			||||||
  welcomeMessage: string
 | 
					  welcomeMessage: string
 | 
				
			||||||
  /** If a query returns 0 result, try again with more relax conditions */
 | 
					  /** If a query returns 0 result, try again with more relax conditions */
 | 
				
			||||||
  simpleSearch: boolean
 | 
					  simpleSearch: boolean
 | 
				
			||||||
  tokenizeUrls: boolean
 | 
					 | 
				
			||||||
  splitCamelCase: boolean
 | 
					  splitCamelCase: boolean
 | 
				
			||||||
  openInNewPane: boolean
 | 
					  openInNewPane: boolean
 | 
				
			||||||
  verboseLogging: boolean
 | 
					  verboseLogging: boolean
 | 
				
			||||||
| 
						 | 
					
 | 
				
			||||||
		Loading…
	
		Reference in New Issue
	
	Block a user