比较 qdii 的收益率

 

最近,QDII基金陆续恢复申购,我想比较几家主流的跟踪 NDX100 的 QDII 基金进行收益率。我编写了一个简单的Python脚本,从天天基金网抓取了相关基金的收益率信息。

脚本运行后,我得到了以下基金的收益率数据,以 QQQ 为业绩比较基准,

总的来说,各家基金的表现差异不大。目前QDII基金普遍限购,能够买到就已经不错了,避开少数表现不佳的基金即可。

代码片段

import asyncio
import pandas as pd
import aiohttp
from bs4 import BeautifulSoup
import re

pl_pattern = r"(近1月|近1年|近3月|近3年|近6月|成立来):(-?\d+(\.\d+)?)%"
date_pattern = r"\d{4}-\d{2}-\d{2}"
基金编号 = [
    "270042", "000834", "161130", "040048", "014978", "159501", "012752", "040047", "006480", "016532",
    "016055", "016057", "019548", "019737", "018966", "019736", "021000", "016535", "012753", "019738",
    "021838", "019525", "160213", "040046", "006479", "539001", "016533", "159513", "513870", "015300",
    "019172", "019173", "019442", "012870", "012871", "012751", "015518", "016056", "018969", "022525",
    "019175", "019739", "513100", "159941", "513300", "159632", "159660", "159659", "003722", "018044",
    "513110", "015299", "159696", "018043", "018967", "019441", "019524", "019547", "000055", "021773",
    "016534", "016058", "019174", "022664", "023422",
]
endpoint = "https://fund.eastmoney.com/{code}.html"

async def fetch(session, url):
    async with session.get(url) as response:
        response.raise_for_status()
        text = await response.text()
        soup = BeautifulSoup(text, "html.parser")

        value_element = soup.select_one(
            "#body > div:nth-child(11) > div > div > div.fundDetail-main > div.fundInfoItem > div.dataOfFund"
        )
        title_element = soup.select_one(
            "#body > div:nth-child(11) > div > div > div.fundDetail-header > div.fundDetail-tit > div"
        )
        status_element = soup.select_one(
            "#body > div:nth-child(11) > div > div > div.fundDetail-main > div.choseBuyWay.notForSale > div.buyWayWrap > div.buyWayStatic > div:nth-child(1),"
            "#body > div:nth-child(11) > div > div > div.fundDetail-main > div.choseBuyWay.canBuy > div.buyWayWrap > div.buyWayStatic > div:nth-child(1)"
        )
        date_element = soup.select_one(
            "#body > div:nth-child(11) > div > div > div.fundDetail-main > div.fundInfoItem > div.dataOfFund > dl.dataItem02 > dt > p"
        )
        if all(map(lambda x: x is not None, (value_element, title_element))):
            matches = re.findall(pl_pattern, value_element.get_text(strip=True))
            value = {}
            title = title_element.get_text(strip=True)
            date = date_element.get_text(strip=True)
            date = re.findall(date_pattern, date)

            print(title, date)
            for category, percentage, _ in matches:
                value[category] = percentage
            if status_element is not None:
                value["状态"] = status_element.get_text(strip=True)
            if date:
                value["日期"] = date[0]
            return title, value
        raise ValueError(f"{url} not found valid element")

async def fetch_all(urls):
    async with aiohttp.ClientSession() as session:
        tasks = [fetch(session, url) for url in urls]
        results = await asyncio.gather(*tasks, return_exceptions=True)
        return results

columns = [
    "基金名称",
    "日期",
    "近1月",
    "近3月",
    "近6月",
    "近1年",
    "近3年",
    "状态",
]
df = pd.DataFrame(columns=columns)

# 生成所有需要抓取的 URL
urls = [endpoint.format(code=code) for code in 基金编号]

# 抓取数据并填充 DataFrame
rows = []
results = asyncio.run(fetch_all(urls))
print(results)
for result in results:
    if isinstance(result, BaseException):
        print(result)
    else:
        title, values = result
        if "美元" in title or "场内" in values.get("状态", ""):
            continue
        row = {col: values.get(col, "") for col in columns[1:]}
        row["基金名称"] = title
        rows.append(row)

# 将所有行合并到 DataFrame 中
rows.sort(key=lambda item: item["基金名称"])
df = pd.concat([df, pd.DataFrame(rows)], ignore_index=True)

# 保存 DataFrame 到 CSV 文件
df.to_csv("fund_data.csv", index=False, encoding="gbk")

print("数据已成功保存到 fund_data.csv")