commit eca7a6190625b25cad0c19f51d78f195db797e4b Author: Zichao Lin Date: Sat Nov 22 20:28:14 2025 +0800 init diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..eb8311c --- /dev/null +++ b/.gitignore @@ -0,0 +1,2 @@ +.venv/ +.vecode/ \ No newline at end of file diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..1d5a1a0 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,4 @@ +{ + "python-envs.defaultEnvManager": "ms-python.python:venv", + "python-envs.pythonProjects": [] +} \ No newline at end of file diff --git a/config.json b/config.json new file mode 100644 index 0000000..e900f4f --- /dev/null +++ b/config.json @@ -0,0 +1,5 @@ +{ + "fund_codes": [ + "014847" + ] +} \ No newline at end of file diff --git a/main.py b/main.py new file mode 100644 index 0000000..22962b0 --- /dev/null +++ b/main.py @@ -0,0 +1,182 @@ +import requests +import json +import pandas as pd +import time +import os +import re +from typing import List, Dict, Optional + + +class SinaFundDataFetcher: + def __init__(self): + self.base_url = "https://finance.sina.com.cn/fund/api/xh5Fund/nav/{}.js" + self.headers = { + "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" + } + + def get_fund_data(self, fund_code: str) -> Optional[Dict]: + """ + 获取单个基金的数据 + """ + url = self.base_url.format(fund_code) + + try: + response = requests.get(url, headers=self.headers, timeout=10) + response.encoding = "utf-8" + + if response.status_code == 200: + content = response.text + json_str = self._parse_jsonp(content) + + if json_str: + data = json.loads(json_str) + return data + else: + print(f"解析JSONP失败: {fund_code}") + return None + else: + print( + f"请求失败,状态码: {response.status_code}, 基金代码: {fund_code}" + ) + return None + + except Exception as e: + print(f"获取基金数据时出错: {fund_code}, 错误: {e}") + return None + + def _parse_jsonp(self, jsonp_str: str) -> Optional[str]: + """ + 解析JSONP格式数据 + """ + match = re.search(r"xh5Fund\(({.*})\)", jsonp_str) + if match: + return match.group(1) + return None + + +class FundDataManager: + def __init__(self, config_file: str = "config.json", output_dir: str = "output"): + self.fetcher = SinaFundDataFetcher() + self.config_file = config_file + self.output_dir = output_dir + os.makedirs(output_dir, exist_ok=True) + + def load_config(self) -> List[str]: + """ + 从config.json加载基金代码列表 + """ + try: + with open(self.config_file, "r", encoding="utf-8") as f: + config = json.load(f) + fund_codes = config.get("fund_codes", []) + print(f"从配置文件中加载了 {len(fund_codes)} 个基金代码") + return fund_codes + except FileNotFoundError: + print(f"配置文件 {self.config_file} 不存在") + return [] + except Exception as e: + print(f"读取配置文件失败: {e}") + return [] + + def parse_and_save_fund_data(self, fund_code: str) -> bool: + """ + 获取、解析并保存单个基金数据 + """ + print(f"正在处理基金 {fund_code}...") + + raw_data = self.fetcher.get_fund_data(fund_code) + if not raw_data or "data" not in raw_data: + print(f"未能获取基金 {fund_code} 的数据") + return False + + data_str = raw_data["data"] + formatted_data = [] + + # 解析数据字符串 + daily_data_list = data_str.split("#") + + for daily_data in daily_data_list: + if not daily_data.strip(): + continue + + fields = daily_data.split(",") + if len(fields) >= 3: # 至少需要日期、单位净值、累计净值三个字段 + try: + # 格式化数据 + record = { + "date": f"{fields[0][:4]}-{fields[0][4:6]}-{fields[0][6:8]}", # YYYY-MM-DD + "NAVpU": float(fields[1]), # 单位净值 + "cNAVpU": float(fields[2]), # 累计单位净值 + } + formatted_data.append(record) + except (ValueError, IndexError) as e: + print(f"解析数据失败: {daily_data}, 错误: {e}") + continue + + if formatted_data: + # 按日期排列 + formatted_data.sort(key=lambda x: x["date"], reverse=False) + + # 保存为JSON文件 + output_file = os.path.join(self.output_dir, f"{fund_code}.json") + try: + with open(output_file, "w", encoding="utf-8") as f: + json.dump(formatted_data, f, ensure_ascii=False, indent=2) + print(f"基金 {fund_code} 数据已保存至: {output_file}") + print(f"共保存 {len(formatted_data)} 条记录") + return True + except Exception as e: + print(f"保存文件失败: {e}") + return False + else: + print(f"基金 {fund_code} 没有有效数据") + return False + + def process_all_funds(self): + """ + 处理所有基金数据 + """ + fund_codes = self.load_config() + + if not fund_codes: + print("没有找到基金代码,请检查配置文件") + return + + success_count = 0 + total_count = len(fund_codes) + + for i, fund_code in enumerate(fund_codes, 1): + print(f"\n[{i}/{total_count}] ", end="") + if self.parse_and_save_fund_data(fund_code): + success_count += 1 + + if i < total_count: + time.sleep(0.1) + + print(f"\n处理完成!成功: {success_count}/{total_count}") + + +def create_sample_config(): + """ + 创建示例配置文件 + """ + sample_config = {"fund_codes": ["014847", "014846"]} + + with open("config.json", "w", encoding="utf-8") as f: + json.dump(sample_config, f, ensure_ascii=False, indent=2) + print("已创建示例配置文件 config.json") + + +def main(): + # 检查配置文件是否存在 + if not os.path.exists("config.json"): + print("配置文件 config.json 不存在,创建示例配置...") + create_sample_config() + return + + # 创建数据管理器并处理所有基金 + manager = FundDataManager() + manager.process_all_funds() + +if __name__ == "__main__": + main() diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..798c878 Binary files /dev/null and b/requirements.txt differ