把项目转换为md知识库,提供给gpts使用
- 创业
- 2025-08-06 06:45:04

把项目转换为md知识库,提供给gpts使用 import os import chardet project_path='' # 项目目录路径 project_name='' # 项目名称 # 定义全局字典,映射文件后缀名到Markdown代码块的语言 language_mapping = { '.java': 'java', '.py': 'python', '.js': 'javascript', '.html': 'html', '.md': 'markdown', '.properties': 'properties', '.xml': 'xml', '.vue': 'vue', '.tsx': 'tsx', '.yml': 'yaml', '.sql': 'sql', '.json': 'json', '.txt': 'text', '.csv': 'csv', '.ini': 'ini', '.c': 'c', '.cpp': 'cpp', '.go': 'go', '.php': 'php', '.rb': 'ruby', } def detect_encoding(file_path): """检测文件编码""" with open(file_path, 'rb') as file: raw_data = file.read() result = chardet.detect(raw_data) return result['encoding'] def write_file_content_to_utf8(file_path, output_file): """读取文件并以UTF-8编码写入到txt文件中""" # 获取文件后缀名 _, ext = os.path.splitext(file_path) # 检查文件大小(20KB = 20480 字节) # if os.path.getsize(file_path) > 91136 and ext != '.md': if os.path.getsize(file_path) > 91136: print(f"忽略大文件:{file_path}") return encoding = detect_encoding(file_path) try: with open(file_path, 'r', encoding=encoding) as file: content = file.read() relative_path = os.path.relpath(file_path, project_path) # 计算相对路径 output_file.write(f"## 文件:{project_name}\\{relative_path}\n\n") # Markdown标题 if not ext == '.md': # 根据文件后缀名确定代码块的语言 language = language_mapping.get(ext, 'text') # 默认为'text' output_file.write(f"```{language}\n") # 语法高亮 output_file.write(content) if not ext == '.md': output_file.write("\n```\n\n") # 结束代码块 else: output_file.write("\n\n") # 结束代码块 except UnicodeDecodeError: print(f"读取失败(编码不支持):{file_path}") def should_ignore(directory,ignored_directories): """检查是否应该忽略给定的目录""" for ignored_dir in ignored_directories: if ignored_dir in directory: return True return False def process_directory(directory, output_file,ignored_directories): """递归处理目录下的所有文件和子目录""" if should_ignore(directory,ignored_directories): return # 忽略整个目录 for item in os.listdir(directory): path = os.path.join(directory, item) if os.path.isdir(path): process_directory(path, output_file,ignored_directories) elif any(path.endswith(suffix) for suffix in file_suffixes): print('path===============',path) write_file_content_to_utf8(path, output_file) def create_knowledge_base(project_directory, output_path): """创建知识库文件""" with open(output_path, 'w', encoding='utf-8') as output_file: process_directory(project_directory, output_file,ignored_directories) def create_knowledge_base(paths, output_path): """创建知识库文件""" global project_path,project_name with open(output_path, 'w', encoding='utf-8', errors='ignore') as output_file: for project_directory in paths: project_path=project_directory project_name = os.path.basename(project_path) # 提取项目名称 output_file.write(f"# 项目:{project_name}\n\n") # Markdown标题 process_directory(project_directory, output_file,ignored_directories) if __name__ == '__main__': ignored_directories = ['target','.idea','node_modules','build','dist','out','logs','log','lib','js_sdk','uni_modules','nativeplugins','static','public'] file_suffixes=['.java', '.properties', '.yml', '.md', '.txt','.js','.html','.vue','.xml','.json','.sql','.csv','.py', '.tsx','.ts','.less','.scss','.gradle' ] project_dirs = [ 'H:\\下载\edge浏览器下载\\spring-security-6.1.6', ] output_path = 'spring-security-6.1.6.md' # 知识库文件的输出路径 create_knowledge_base(project_dirs, output_path)
detect_encoding(file_path): 检测并返回文件的编码类型。 write_file_content_to_utf8(file_path, output_file): 读取文件内容,根据文件扩展名决定是否加上 Markdown 代码块的格式,并写入到指定的输出文件中。 should_ignore(directory, ignored_directories): 判断给定的目录是否应该被忽略。 process_directory(directory, output_file, ignored_directories): 递归处理给定目录下的所有文件和子目录,转换为 Markdown 格式。 create_knowledge_base(paths, output_path): 遍历给定的项目目录列表,为每个项目创建 Markdown 格式的知识库文件。
把项目转换为md知识库,提供给gpts使用由讯客互联创业栏目发布,感谢您对讯客互联的认可,以及对我们原创作品以及文章的青睐,非常欢迎各位朋友分享到个人网站或者朋友圈,但转载请说明文章出处“把项目转换为md知识库,提供给gpts使用”