import requests import json import pandas as pd import time import os import re from typing import List, Dict, Optional class SinaFundDataFetcher: def __init__(self): self.base_url = "https://finance.sina.com.cn/fund/api/xh5Fund/nav/{}.js" self.headers = { "User-Agent": "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36" } def get_fund_data(self, fund_code: str) -> Optional[Dict]: """ 获取单个基金的数据 """ url = self.base_url.format(fund_code) try: response = requests.get(url, headers=self.headers, timeout=10) response.encoding = "utf-8" if response.status_code == 200: content = response.text json_str = self._parse_jsonp(content) if json_str: data = json.loads(json_str) return data else: print(f"解析JSONP失败: {fund_code}") return None else: print( f"请求失败,状态码: {response.status_code}, 基金代码: {fund_code}" ) return None except Exception as e: print(f"获取基金数据时出错: {fund_code}, 错误: {e}") return None def _parse_jsonp(self, jsonp_str: str) -> Optional[str]: """ 解析JSONP格式数据 """ match = re.search(r"xh5Fund\(({.*})\)", jsonp_str) if match: return match.group(1) return None class FundDataManager: def __init__(self, config_file: str = "config.json", output_dir: str = "output"): self.fetcher = SinaFundDataFetcher() self.config_file = config_file self.output_dir = output_dir os.makedirs(output_dir, exist_ok=True) def load_config(self) -> List[str]: """ 从config.json加载基金代码列表 """ try: with open(self.config_file, "r", encoding="utf-8") as f: config = json.load(f) fund_codes = config.get("fund_codes", []) print(f"从配置文件中加载了 {len(fund_codes)} 个基金代码") return fund_codes except FileNotFoundError: print(f"配置文件 {self.config_file} 不存在") return [] except Exception as e: print(f"读取配置文件失败: {e}") return [] def parse_and_save_fund_data(self, fund_code: str) -> bool: """ 获取、解析并保存单个基金数据 """ print(f"正在处理基金 {fund_code}...") raw_data = self.fetcher.get_fund_data(fund_code) if not raw_data or "data" not in raw_data: print(f"未能获取基金 {fund_code} 的数据") return False data_str = raw_data["data"] formatted_data = [] # 解析数据字符串 daily_data_list = data_str.split("#") for daily_data in daily_data_list: if not daily_data.strip(): continue fields = daily_data.split(",") if len(fields) >= 3: # 至少需要日期、单位净值、累计净值三个字段 try: # 格式化数据 record = { "date": f"{fields[0][:4]}-{fields[0][4:6]}-{fields[0][6:8]}", # YYYY-MM-DD "NAVpU": float(fields[1]), # 单位净值 "cNAVpU": float(fields[2]), # 累计单位净值 } formatted_data.append(record) except (ValueError, IndexError) as e: print(f"解析数据失败: {daily_data}, 错误: {e}") continue if formatted_data: # 按日期排列 formatted_data.sort(key=lambda x: x["date"], reverse=False) # 保存为JSON文件 output_file = os.path.join(self.output_dir, f"{fund_code}.json") try: with open(output_file, "w", encoding="utf-8") as f: json.dump(formatted_data, f, ensure_ascii=False, indent=2) print(f"基金 {fund_code} 数据已保存至: {output_file}") print(f"共保存 {len(formatted_data)} 条记录") return True except Exception as e: print(f"保存文件失败: {e}") return False else: print(f"基金 {fund_code} 没有有效数据") return False def process_all_funds(self): """ 处理所有基金数据 """ fund_codes = self.load_config() if not fund_codes: print("没有找到基金代码,请检查配置文件") return success_count = 0 total_count = len(fund_codes) for i, fund_code in enumerate(fund_codes, 1): print(f"\n[{i}/{total_count}] ", end="") if self.parse_and_save_fund_data(fund_code): success_count += 1 if i < total_count: time.sleep(0.1) print(f"\n处理完成!成功: {success_count}/{total_count}") def create_sample_config(): """ 创建示例配置文件 """ sample_config = {"fund_codes": ["014847", "014846"]} with open("config.json", "w", encoding="utf-8") as f: json.dump(sample_config, f, ensure_ascii=False, indent=2) print("已创建示例配置文件 config.json") def main(): # 检查配置文件是否存在 if not os.path.exists("config.json"): print("配置文件 config.json 不存在,创建示例配置...") create_sample_config() return # 创建数据管理器并处理所有基金 manager = FundDataManager() manager.process_all_funds() if __name__ == "__main__": main()