import asyncio import base64 import os import requests import anthropic import urllib3 from datetime import datetime, timedelta from dotenv import load_dotenv from playwright.async_api import async_playwright from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text from sqlalchemy.orm import declarative_base, sessionmaker urllib3.disable_warnings() load_dotenv("../.env") EINVOICE_USER = os.getenv("EINVOICE_USER") EINVOICE_PASS = os.getenv("EINVOICE_PASS") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # 本地直接連 localhost DATABASE_URL = os.getenv("LOCAL_DATABASE_URL") engine = create_engine(DATABASE_URL) SessionLocal = sessionmaker(bind=engine) Base = declarative_base() class Transaction(Base): __tablename__ = "transactions" id = Column(Integer, primary_key=True, index=True) user_id = Column(String) category = Column(String) amount = Column(Float) note = Column(String, nullable=True) created_at = Column(DateTime, default=datetime.now) def solve_captcha(img_b64: str) -> str: client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) msg = client.messages.create( model="claude-haiku-4-5-20251001", max_tokens=10, messages=[{ "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": img_b64 } }, { "type": "text", "text": "這是驗證碼圖片,只有5個數字,只回傳這5個數字,不要其他任何文字" } ] }] ) return msg.content[0].text.strip() async def login_and_get_token() -> str | None: async with async_playwright() as p: browser = await p.chromium.launch(headless=True) page = await browser.new_page() # 載入登入頁拿 login_challenge browser = await p.chromium.launch(headless=False) page = await browser.new_page() await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw") await page.wait_for_timeout(8000) url = page.url print(f"目前 URL: {url}") from urllib.parse import parse_qs fragment = url.split("?")[-1] if "?" in url else "" params = parse_qs(fragment) login_challenge = params.get("login_challenge", [None])[0] print(f"login_challenge: {login_challenge}") # 拿驗證碼 res = requests.get( "https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha", verify=False ) captcha_data = res.json() captcha_token = captcha_data["token"] captcha_text = solve_captcha(captcha_data["image"]) print(f"驗證碼: {captcha_text}") # 登入 res = requests.post( "https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin", json={ "loginType": "U", "userType": "MW", "loginChallenge": login_challenge, "captchaToken": captcha_token, "captcha": captcha_text, "customId": EINVOICE_USER, "password": EINVOICE_PASS, }, verify=False ) data = res.json() redirect_url = data.get("redirectTo") print(f"redirectTo: {redirect_url}") if not redirect_url: print(f"登入失敗: {data}") await browser.close() return None # 跟隨 redirect 讓 token 存進 localStorage await page.goto(redirect_url) await page.wait_for_load_state("domcontentloaded") await page.wait_for_timeout(8000) # 等久一點 url = page.url print(f"redirect 後 URL: {url}") # 印出所有 localStorage # 同時檢查 localStorage 和 sessionStorage local_keys = await page.evaluate("Object.keys(localStorage)") session_keys = await page.evaluate("Object.keys(sessionStorage)") print("localStorage keys:", local_keys) print("sessionStorage keys:", session_keys) await page.wait_for_timeout(3000) for key in session_keys: val = await page.evaluate(f"sessionStorage.getItem('{key}')") print(f" session {key}: {val[:80] if val else None}") token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')") print(f"token: {token[:30] if token else 'None'}") await browser.close() return token async def fetch_invoices(token: str, days: int = 7) -> list: end_date = datetime.now() start_date = end_date - timedelta(days=days) # 格式要有毫秒 def to_iso(dt): return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z" headers = {"authorization": f"Bearer {token}"} # 不去掉 L res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT", headers=headers, json={ "cardCode": "", "carrierId2": "", "searchStartDate": to_iso(start_date), "searchEndDate": to_iso(end_date), "invoiceStatus": "all", "isSearchAll": "true" }, verify=False ) print(f"JWT status: {res.status_code}") print(f"JWT response: {res.text[:200]}") jwt_token = res.text.strip().strip('"') res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice", headers=headers, json={"token": jwt_token}, verify=False ) await page.wait_for_timeout(3000) print(f"Invoice status: {res.status_code}") print(f"Invoice response: {res.text[:300]}") print(f"拿到 {len(res.json().get('invoices', []))} 筆發票") # return res.json().get("content", []) def save_invoices(invoices: list): db = SessionLocal() saved = 0 try: for inv in invoices: existing = db.query(Transaction).filter( Transaction.note == inv["invoiceNumber"] ).first() if existing: continue db.add(Transaction( user_id="auto_import", category=inv["sellerName"], amount=inv["totalAmount"], note=inv["invoiceNumber"], created_at=datetime.fromisoformat( inv["invoiceDate"].replace("Z", "+00:00") ) )) saved += 1 db.commit() print(f"✅ 新增 {saved} 筆,略過 {len(invoices) - saved} 筆重複") finally: db.close() async def main(): print("開始抓取發票...") token = await login_and_get_token() if not token: print("登入失敗") return invoices = await fetch_invoices(token) print(f"拿到 {len(invoices)} 筆發票") for inv in invoices: print(f" {inv['invoiceDate'][:10]} {inv['sellerName']} ${inv['totalAmount']}") save_invoices(invoices) if __name__ == "__main__": asyncio.run(main())