import asyncio import base64 import os import requests import anthropic import urllib3 from datetime import datetime, timedelta from dotenv import load_dotenv from playwright.async_api import async_playwright from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text from sqlalchemy.orm import declarative_base, sessionmaker urllib3.disable_warnings() load_dotenv("../.env") EINVOICE_USER = os.getenv("EINVOICE_USER") EINVOICE_PASS = os.getenv("EINVOICE_PASS") ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY") # 本地直接連 localhost # DATABASE_URL = os.getenv("LOCAL_DATABASE_URL") # engine = create_engine(DATABASE_URL) # SessionLocal = sessionmaker(bind=engine) Base = declarative_base() class Transaction(Base): __tablename__ = "transactions" id = Column(Integer, primary_key=True, index=True) user_id = Column(String) category = Column(String) amount = Column(Float) note = Column(String, nullable=True) created_at = Column(DateTime, default=datetime.now) def solve_captcha(img_b64: str) -> str: client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) msg = client.messages.create( model="claude-haiku-4-5-20251001", max_tokens=10, messages=[{ "role": "user", "content": [ { "type": "image", "source": { "type": "base64", "media_type": "image/png", "data": img_b64 } }, { "type": "text", "text": "這是驗證碼圖片,只有5個數字,只回傳這5個數字,不要其他任何文字" } ] }] ) return msg.content[0].text.strip() async def login_and_get_token() -> str | None: async with async_playwright() as p: # 載入登入頁拿 login_challenge browser = await p.chromium.launch(headless=False) page = await browser.new_page() await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw") await page.wait_for_timeout(8000) url = page.url print(f"目前 URL: {url}") from urllib.parse import parse_qs fragment = url.split("?")[-1] if "?" in url else "" params = parse_qs(fragment) login_challenge = params.get("login_challenge", [None])[0] print(f"login_challenge: {login_challenge}") # 拿驗證碼 res = requests.get( "https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha", verify=False ) captcha_data = res.json() captcha_token = captcha_data["token"] captcha_text = solve_captcha(captcha_data["image"]) print(f"驗證碼: {captcha_text}") # 登入 res = requests.post( "https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin", json={ "loginType": "U", "userType": "MW", "loginChallenge": login_challenge, "captchaToken": captcha_token, "captcha": captcha_text, "customId": EINVOICE_USER, "password": EINVOICE_PASS, }, verify=False ) data = res.json() redirect_url = data.get("redirectTo") print(f"redirectTo: {redirect_url}") if not redirect_url: print(f"登入失敗: {data}") await browser.close() return None # 跟隨 redirect 讓 token 存進 localStorage await page.goto(redirect_url) await page.wait_for_load_state("domcontentloaded") await page.wait_for_timeout(8000) # 等久一點 url = page.url print(f"redirect 後 URL: {url}") # 印出所有 localStorage # 同時檢查 localStorage 和 sessionStorage local_keys = await page.evaluate("Object.keys(localStorage)") session_keys = await page.evaluate("Object.keys(sessionStorage)") print("localStorage keys:", local_keys) print("sessionStorage keys:", session_keys) await page.wait_for_timeout(3000) for key in session_keys: val = await page.evaluate(f"sessionStorage.getItem('{key}')") print(f" session {key}: {val[:80] if val else None}") token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')") print(f"token: {token[:30] if token else 'None'}") await browser.close() return token async def fetch_invoices(token: str, days: int = 7) -> list: end_date = datetime.now() start_date = end_date - timedelta(days=days) # 格式要有毫秒 def to_iso(dt): return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z" headers = {"authorization": f"Bearer {token}"} # 不去掉 L res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT", headers=headers, json={ "cardCode": "", "carrierId2": "", "searchStartDate": to_iso(start_date), "searchEndDate": to_iso(end_date), "invoiceStatus": "all", "isSearchAll": "true" }, verify=False ) print(f"JWT status: {res.status_code}") print(f"JWT response: {res.text[:200]}") jwt_token = res.text.strip().strip('"') res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice", headers=headers, json={"token": jwt_token}, verify=False ) # await page.wait_for_timeout(3000) print(f"Invoice status: {res.status_code}") print(f"Invoice response: {res.text[:300]}") data = res.json() invoice_list = data.get("content", []) print(f"拿到 {len(invoice_list)} 筆發票") # 務必取消註解並回傳,否則 main() 會拿到 None 並報錯 return invoice_list def save_invoices(invoices: list): # db = SessionLocal() saved = 0 try: for inv in invoices: inv_date = inv.get("invoiceDate", "未知日期") seller = inv.get("sellerName", "未知店家") amount = inv.get("totalAmount", 0) inv_num = inv.get("invoiceNumber", "無號碼") # existing = db.query(Transaction).filter( # Transaction.note == inv["invoiceNumber"] # ).first() # if existing: # continue # db.add(Transaction( # user_id="auto_import", # category=inv["sellerName"], # amount=inv["totalAmount"], # note=inv["invoiceNumber"], # created_at=datetime.fromisoformat( # inv["invoiceDate"].replace("Z", "+00:00") # ) # )) # 美化輸出格式 print(f"新增發票 | 日期: {inv_date[:10]} | 店家: {seller[:15]:<15} | 金額: {amount:>6} | 號碼: {inv_num}") saved += 1 # db.commit() print("-" * 30) print(f"✅ 模擬處理完成:預計新增 {saved} 筆,總計來源 {len(invoices)} 筆") except Exception as e: print("❌ 儲存發票失敗:", e) if 'inv' in locals(): print(f"錯誤發票內容: {inv}") # db.rollback() # finally: # db.close() async def main(): print("開始抓取發票...") token = await login_and_get_token() if not token: print("登入失敗") return invoices = await fetch_invoices(token) save_invoices(invoices) if __name__ == "__main__": asyncio.run(main())