-
-
Notifications
You must be signed in to change notification settings - Fork 473
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Merge pull request #336 from huy-trn/good_for_rag
Code compression
- Loading branch information
Showing
33 changed files
with
1,076 additions
and
6 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.
Oops, something went wrong.
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,23 @@ | ||
/** | ||
* @see https://unpkg.com/browse/tree-sitter-wasms@latest/out/ | ||
*/ | ||
export const ext2Lang = { | ||
vue: 'javascript', | ||
cjs: 'javascript', | ||
js: 'javascript', | ||
jsx: 'javascript', | ||
ts: 'typescript', | ||
tsx: 'typescript', | ||
h: 'c', | ||
c: 'c', | ||
hpp: 'cpp', | ||
cpp: 'cpp', | ||
py: 'python', | ||
rs: 'rust', | ||
java: 'java', | ||
go: 'go', | ||
cs: 'c_sharp', | ||
rb: 'ruby', | ||
php: 'php', | ||
swift: 'swift', | ||
}; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,29 @@ | ||
import { queryC } from './queries/c.js'; | ||
import { queryCSharp } from './queries/cSharp.js'; | ||
import { queryCpp } from './queries/cpp.js'; | ||
import { queryGo } from './queries/go.js'; | ||
import { queryJava } from './queries/java.js'; | ||
import { queryJavascript } from './queries/javascript.js'; | ||
import { queryPhp } from './queries/php.js'; | ||
import { queryPython } from './queries/python.js'; | ||
import { queryRuby } from './queries/ruby.js'; | ||
import { queryRust } from './queries/rust.js'; | ||
import { querySwift } from './queries/swift.js'; | ||
import { queryTypescript } from './queries/typescript.js'; | ||
|
||
export const lang2Query = { | ||
javascript: queryJavascript, | ||
typescript: queryTypescript, | ||
c: queryC, | ||
cpp: queryCpp, | ||
python: queryPython, | ||
rust: queryRust, | ||
go: queryGo, | ||
c_sharp: queryCSharp, | ||
ruby: queryRuby, | ||
java: queryJava, | ||
php: queryPhp, | ||
swift: querySwift, | ||
}; | ||
|
||
export type SupportedLang = keyof typeof lang2Query; |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,67 @@ | ||
import * as path from 'node:path'; | ||
import Parser from 'web-tree-sitter'; | ||
|
||
import { RepomixError } from '../../shared/errorHandle.js'; | ||
import { ext2Lang } from './ext2Lang.js'; | ||
import { type SupportedLang, lang2Query } from './lang2Query.js'; | ||
import { loadLanguage } from './loadLanguage.js'; | ||
|
||
export class LanguageParser { | ||
private loadedParsers: { | ||
[key: string]: Parser; | ||
} = {}; | ||
|
||
private loadedQueries: { | ||
[key: string]: Parser.Query; | ||
} = {}; | ||
|
||
private getFileExtension(filePath: string) { | ||
return path.extname(filePath).toLowerCase().slice(1); | ||
} | ||
|
||
private async prepareLang(name: SupportedLang) { | ||
try { | ||
const lang = await loadLanguage(name); | ||
const parser = new Parser(); | ||
parser.setLanguage(lang); | ||
this.loadedParsers[name] = parser; | ||
this.loadedQueries[name] = lang.query(lang2Query[name]); | ||
} catch (error) { | ||
const message = error instanceof Error ? error.message : String(error); | ||
throw new RepomixError(`Failed to prepare language ${name}: ${message}`); | ||
} | ||
} | ||
// 'name' is name of the language | ||
public async getParserForLang(name: SupportedLang) { | ||
if (!this.loadedParsers[name]) { | ||
await this.prepareLang(name); | ||
} | ||
return this.loadedParsers[name]; | ||
} | ||
|
||
// 'name' is name of the language | ||
public async getQueryForLang(name: SupportedLang) { | ||
if (!this.loadedQueries[name]) { | ||
await this.prepareLang(name); | ||
} | ||
return this.loadedQueries[name]; | ||
} | ||
|
||
public guessTheLang(filePath: string): SupportedLang | undefined { | ||
const ext = this.getFileExtension(filePath); | ||
if (!Object.keys(ext2Lang).includes(ext)) { | ||
return undefined; | ||
} | ||
const lang = ext2Lang[ext as keyof typeof ext2Lang] as SupportedLang; | ||
return lang; | ||
} | ||
|
||
public async init() { | ||
try { | ||
await Parser.init(); | ||
} catch (error) { | ||
const message = error instanceof Error ? error.message : String(error); | ||
throw new Error(`Failed to initialize parser: ${message}`); | ||
} | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,30 @@ | ||
import fs from 'node:fs/promises'; | ||
import { createRequire } from 'node:module'; | ||
import path from 'node:path'; | ||
import Parser from 'web-tree-sitter'; | ||
|
||
const require = createRequire(import.meta.url); | ||
|
||
export async function loadLanguage(langName: string): Promise<Parser.Language> { | ||
if (!langName) { | ||
throw new Error('Invalid language name'); | ||
} | ||
|
||
try { | ||
const wasmPath = await getWasmPath(langName); | ||
return await Parser.Language.load(wasmPath); | ||
} catch (error: unknown) { | ||
const message = error instanceof Error ? error.message : String(error); | ||
throw new Error(`Failed to load language ${langName}: ${message}`); | ||
} | ||
} | ||
|
||
async function getWasmPath(langName: string): Promise<string> { | ||
const wasmPath = require.resolve(`tree-sitter-wasms/out/tree-sitter-${langName}.wasm`); | ||
try { | ||
await fs.access(wasmPath); | ||
return wasmPath; | ||
} catch { | ||
throw new Error(`WASM file not found for language ${langName}: ${wasmPath}`); | ||
} | ||
} |
Oops, something went wrong.