Parse glossary bib files to populate intellisense

5c4a45bf · Jerome Lelong · 31614f32 · 5c4a45bf · 5c4a45bf · 5c4a45bf
Commit 5c4a45bf authored 2 months ago by Jerome Lelong
--- a/src/completion/completer/citation.ts
+++ b/src/completion/completer/citation.ts
@@ -44,7 +44,10 @@ export const bibTools = {
    parseAbbrevations
 }

-function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue): string {
+function expandField(abbreviations: {[key: string]: string}, value: bibtexParser.FieldValue | undefined): string {
+    if (value === undefined) {
+        return ''
+    }
    if (value.kind === 'concat') {
        const args = value.content as bibtexParser.FieldValue[]
        return args.map(arg => expandField(abbreviations, arg)).join(' ')

--- a/src/completion/completer/glossary.ts
+++ b/src/completion/completer/glossary.ts
 import * as vscode from 'vscode'
 import type * as Ast from '@unified-latex/unified-latex-types'
+import { bibtexParser } from 'latex-utensils'
 import { lw } from '../../lw'
 import { GlossaryType } from '../../types'
 import type { CompletionProvider, FileCache, GlossaryItem } from '../../types'
 import { argContentToStr } from '../../utils/parser'
 import { getLongestBalancedString } from '../../utils/utils'
+import { bibTools } from './citation'

+const logger = lw.log('Intelli', 'Glossary')
 export const provider: CompletionProvider = { from }
 export const glossary = {
    parse,
@@ -13,17 +16,19 @@ export const glossary = {
 }

 const data = {
+    // The keys are the labels of the glossary items.
    glossaries: new Map<string, GlossaryItem>(),
-    acronyms: new Map<string, GlossaryItem>()
+    acronyms: new Map<string, GlossaryItem>(),
+    // The keys are the paths of the `.bib` files.
+    bibEntries: new Map<string, GlossaryItem[]>(),
 }

-interface GlossaryEntry {
-    label: string | undefined,
-    description: string | undefined
-}
+lw.watcher.bib.onCreate(uri => parseBibFile(uri.fsPath))
+lw.watcher.bib.onChange(uri => parseBibFile(uri.fsPath))
+lw.watcher.bib.onDelete(uri => removeEntriesInFile(uri.fsPath))

 function from(result: RegExpMatchArray): vscode.CompletionItem[] {
-    updateAll()
+    updateAll(getIncludedBibs(lw.root.file.path))
    let suggestions: Map<string, GlossaryItem>

    if (result[1] && result[1].match(/^ac/i)) {
@@ -38,14 +43,58 @@ function from(result: RegExpMatchArray): vscode.CompletionItem[] {
 }

 function getItem(token: string): GlossaryItem | undefined {
-    updateAll()
+    updateAll(getIncludedBibs(lw.root.file.path))
    return data.glossaries.get(token) || data.acronyms.get(token)
 }

-function updateAll() {
+/**
+ * Returns the array of the paths of glossary `.bib` files referenced from `file`.
+ *
+ * @param file The path of a LaTeX file.
+ * @param visitedTeX Internal use only.
+ */
+function getIncludedBibs(file?: string, visitedTeX: string[] = []): string[] {
+    if (file === undefined) {
+        return []
+    }
+    const cache = lw.cache.get(file)
+    if (cache === undefined) {
+        return []
+    }
+    let bibs = Array.from(cache.glossarybibfiles)
+    visitedTeX.push(file)
+    for (const child of cache.children) {
+        if (visitedTeX.includes(child.filePath)) {
+            // Already included
+            continue
+        }
+        bibs = Array.from(new Set(bibs.concat(getIncludedBibs(child.filePath, visitedTeX))))
+    }
+    return bibs
+}
+
+/**
+ * Returns aggregated glossary entries from `.bib` files and glossary items defined on LaTeX files included in the root file.
+ *
+ * @param bibFiles The array of the paths of `.bib` files. If `undefined`, the keys of `bibEntries` are used.
+ */
+function updateAll(bibFiles: string[]) {
    // Extract cached references
    const glossaryList: string[] = []

+    // From bib files
+    bibFiles.forEach(file => {
+        const entries = data.bibEntries.get(file)
+        entries?.forEach(entry => {
+            if (entry.type === GlossaryType.glossary) {
+                data.glossaries.set(entry.label, entry)
+            } else {
+                data.acronyms.set(entry.label, entry)
+            }
+            glossaryList.push(entry.label)
+        })
+    })
+
    lw.cache.getIncludedTeX().forEach(cachedFile => {
        const cachedGlossaries = lw.cache.get(cachedFile)?.elements.glossary
        if (cachedGlossaries === undefined) {
@@ -61,7 +110,7 @@ function updateAll() {
        })
    })

-    // Remove references that has been deleted
+    // Remove references that have been deleted
    data.glossaries.forEach((_, key) => {
        if (!glossaryList.includes(key)) {
            data.glossaries.delete(key)
@@ -74,6 +123,64 @@ function updateAll() {
    })
 }

+/**
+ * Parse a glossary `.bib` file. The results are stored in this instance.
+ *
+ * @param fileName The path of `.bib` file.
+ */
+async function parseBibFile(fileName: string) {
+    logger.log(`Parsing glossary .bib entries from ${fileName}`)
+    const configuration = vscode.workspace.getConfiguration('latex-workshop', vscode.Uri.file(fileName))
+    if ((await lw.external.stat(vscode.Uri.file(fileName))).size >= (configuration.get('bibtex.maxFileSize') as number) * 1024 * 1024) {
+        logger.log(`Bib file is too large, ignoring it: ${fileName}`)
+        data.bibEntries.delete(fileName)
+        return
+    }
+    const newEntry: GlossaryItem[] = []
+    const bibtex = await lw.file.read(fileName)
+    logger.log(`Parse BibTeX AST from ${fileName} .`)
+    const ast = await lw.parser.parse.bib(vscode.Uri.file(fileName), bibtex ?? '')
+    if (ast === undefined) {
+        logger.log(`Parsed 0 bib entries from ${fileName}.`)
+        lw.event.fire(lw.event.FileParsed, fileName)
+        return
+    }
+    const abbreviations = bibTools.parseAbbrevations(ast)
+    ast.content
+        .filter(bibtexParser.isEntry)
+        .forEach((entry: bibtexParser.Entry) => {
+            if (entry.internalKey === undefined) {
+                return
+            }
+            let type: GlossaryType
+            if ( ['entry'].includes(entry.entryType) ) {
+                type = GlossaryType.glossary
+            } else {
+                type = GlossaryType.acronym
+            }
+            const name = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'name')?.value)
+            const description = bibTools.expandField(abbreviations, entry.content.find(field => field.name === 'description')?.value)
+            const item: GlossaryItem = {
+                type,
+                label: entry.internalKey,
+                filePath: fileName,
+                position: new vscode.Position(entry.location.start.line - 1, entry.location.start.column - 1),
+                kind: vscode.CompletionItemKind.Reference,
+                detail: name + ': ' + description
+            }
+            newEntry.push(item)
+        })
+    data.bibEntries.set(fileName, newEntry)
+    logger.log(`Parsed ${newEntry.length} glossary bib entries from ${fileName} .`)
+    void lw.outline.reconstruct()
+    lw.event.fire(lw.event.FileParsed, fileName)
+}
+
+function removeEntriesInFile(file: string) {
+    logger.log(`Remove parsed bib entries for ${file}`)
+    data.bibEntries.delete(file)
+}
+
 function parse(cache: FileCache) {
    if (cache.ast !== undefined) {
        cache.elements.glossary = parseAst(cache.ast, cache.filePath)
@@ -84,12 +191,13 @@ function parse(cache: FileCache) {

 function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] {
    let glos: GlossaryItem[] = []
-    let entry: GlossaryEntry = { label: '', description: '' }
+    let label: string = ''
+    let description: string = ''
    let type: GlossaryType | undefined

    if (node.type === 'macro' && ['newglossaryentry', 'provideglossaryentry'].includes(node.content)) {
        type = GlossaryType.glossary
-        let description = argContentToStr(node.args?.[1]?.content || [], true)
+        description = argContentToStr(node.args?.[1]?.content || [], true)
        const index = description.indexOf('description=')
        if (index >= 0) {
            description = description.slice(index + 12)
@@ -101,28 +209,23 @@ function parseAst(node: Ast.Node, filePath: string): GlossaryItem[] {
        } else {
            description = ''
        }
-        entry = {
-            label: argContentToStr(node.args?.[0]?.content || []),
-            description
-        }
+        label = argContentToStr(node.args?.[0]?.content || [])
    } else if (node.type === 'macro' && ['longnewglossaryentry', 'longprovideglossaryentry', 'newacronym', 'newabbreviation', 'newabbr'].includes(node.content)) {
        if (['longnewglossaryentry', 'longprovideglossaryentry'].includes(node.content)) {
            type = GlossaryType.glossary
        } else {
            type = GlossaryType.acronym
        }
-        entry = {
-            label: argContentToStr(node.args?.[1]?.content || []),
-            description: argContentToStr(node.args?.[3]?.content || []),
-        }
+        label = argContentToStr(node.args?.[1]?.content || [])
+        description = argContentToStr(node.args?.[3]?.content || [])
    }
-    if (type !== undefined && entry.label && entry.description && node.position !== undefined) {
+    if (type !== undefined && label && description && node.position !== undefined) {
        glos.push({
            type,
            filePath,
            position: new vscode.Position(node.position.start.line - 1, node.position.start.column - 1),
-            label: entry.label,
-            detail: entry.description,
+            label,
+            detail: description,
            kind: vscode.CompletionItemKind.Reference
        })
    }

--- a/src/core/cache.ts
+++ b/src/core/cache.ts
@@ -250,6 +250,7 @@ async function refreshCache(filePath: string, rootPath?: string): Promise<Promis
        elements: {},
        children: [],
        bibfiles: new Set(),
+        glossarybibfiles: new Set(),
        external: {}}
    caches.set(filePath, fileCache)
    rootPath = rootPath || lw.root.file.path
@@ -474,6 +475,7 @@ async function updateElements(fileCache: FileCache): Promise<void> {
    lw.completion.subsuperscript.parse(fileCache)
    lw.completion.input.parseGraphicsPath(fileCache)
    await updateBibfiles(fileCache)
+    await updateGlossaryBibFiles(fileCache)
    const elapsed = performance.now() - start
    logger.log(`Updated elements in ${elapsed.toFixed(2)} ms: ${fileCache.filePath} .`)
 }
@@ -516,6 +518,41 @@ async function updateBibfiles(fileCache: FileCache) {
    }
 }

+/**
+ * Updates the glossary files associated with a given file cache.
+ *
+ * This function parses the content of a file cache to find `\GlsXtrLoadResources``
+ * using a regular expression. It extracts the  file paths specified in these
+ * macros, resolves their full paths, and adds them to the set of glossary
+ * files in the file cache. If a glossary file is not excluded, it logs the
+ * action, adds the file to the cache, and ensures that it is being watched for
+ * changes.
+ *
+ * @param {FileCache} fileCache - The file cache object to update with
+ * bibliography files.
+ */
+async function updateGlossaryBibFiles(fileCache: FileCache) {
+    const glossaryReg = /\\GlsXtrLoadResources\s*\[.*?src=\{([^}]+)\}.*?\]/gs
+
+    let result: RegExpExecArray | null
+    while ((result = glossaryReg.exec(fileCache.contentTrimmed)) !== null) {
+        const bibs = (result[1] ? result[1] : result[2]).split(',').map(bib => bib.trim())
+
+        for (const bib of bibs) {
+            const bibPath = await utils.resolveFile([path.dirname(fileCache.filePath)], bib, '.bib')
+            if (!bibPath || isExcluded(bibPath)) {
+                continue
+            }
+            fileCache.glossarybibfiles.add(bibPath)
+            logger.log(`Glossary bib ${bibPath} from ${fileCache.filePath} .`)
+            const bibUri = vscode.Uri.file(bibPath)
+            if (!lw.watcher.bib.has(bibUri)) {
+                lw.watcher.bib.add(bibUri)
+            }
+        }
+    }
+}
+
 /**
 * Loads and processes a .fls file related to a specified file path.
 *

--- a/src/outline/structure/bibtex.ts
+++ b/src/outline/structure/bibtex.ts
@@ -11,7 +11,7 @@ const logger = lw.log('Structure', 'BibTeX')
 * Convert a bibtexParser.FieldValue to a string
 * @param field the bibtexParser.FieldValue to parse
 */
-function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string {
+export function fieldValueToString(field: bibtexParser.FieldValue, abbreviations: {[abbr: string]: string}): string {
    if (field.kind === 'concat') {
        return field.content.map(value => fieldValueToString(value, abbreviations)).reduce((acc, cur) => {return acc + ' # ' + cur})
    } else if (field.kind === 'abbreviation') {

--- a/src/types.ts
+++ b/src/types.ts
@@ -37,6 +37,8 @@ export type FileCache = {
    }[],
    /** The array of the paths of `.bib` files referenced from the LaTeX file */
    bibfiles: Set<string>,
+    /** The array of the paths of `.bib` files listed by `\GlsXtrLoadResources` to provide glossary entries */
+    glossarybibfiles: Set<string>,
    /** A dictionary of external documents provided by `\externaldocument` of
     * `xr` package. The value is its prefix `\externaldocument[prefix]{*}` */
    external: {[filePath: string]: string},