Files
linebot_finance/app/invoice_fetcher.py
henry4682 1ea7feacf1 feat: invoice fetch
1. complete invoice fetch
2. remove ddddocr library
2026-03-09 11:36:08 +08:00

224 lines
7.6 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import base64
import os
import requests
import anthropic
import urllib3
from datetime import datetime, timedelta
from dotenv import load_dotenv
from playwright.async_api import async_playwright
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
from sqlalchemy.orm import declarative_base, sessionmaker
urllib3.disable_warnings()
load_dotenv("../.env")
EINVOICE_USER = os.getenv("EINVOICE_USER")
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
# 本地直接連 localhost
# DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
# engine = create_engine(DATABASE_URL)
# SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Transaction(Base):
__tablename__ = "transactions"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String)
category = Column(String)
amount = Column(Float)
note = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.now)
def solve_captcha(img_b64: str) -> str:
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
msg = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=10,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": img_b64
}
},
{
"type": "text",
"text": "這是驗證碼圖片只有5個數字只回傳這5個數字不要其他任何文字"
}
]
}]
)
return msg.content[0].text.strip()
async def login_and_get_token() -> str | None:
async with async_playwright() as p:
# 載入登入頁拿 login_challenge
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
from urllib.parse import parse_qs
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
captcha_text = solve_captcha(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
url = page.url
print(f"redirect 後 URL: {url}")
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
await browser.close()
return token
async def fetch_invoices(token: str, days: int = 7) -> list:
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# 格式要有毫秒
def to_iso(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
headers=headers,
json={
"cardCode": "",
"carrierId2": "",
"searchStartDate": to_iso(start_date),
"searchEndDate": to_iso(end_date),
"invoiceStatus": "all",
"isSearchAll": "true"
},
verify=False
)
print(f"JWT status: {res.status_code}")
print(f"JWT response: {res.text[:200]}")
jwt_token = res.text.strip().strip('"')
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
headers=headers,
json={"token": jwt_token},
verify=False
)
# await page.wait_for_timeout(3000)
print(f"Invoice status: {res.status_code}")
print(f"Invoice response: {res.text[:300]}")
data = res.json()
invoice_list = data.get("content", [])
print(f"拿到 {len(invoice_list)} 筆發票")
# 務必取消註解並回傳,否則 main() 會拿到 None 並報錯
return invoice_list
def save_invoices(invoices: list):
# db = SessionLocal()
saved = 0
try:
for inv in invoices:
inv_date = inv.get("invoiceDate", "未知日期")
seller = inv.get("sellerName", "未知店家")
amount = inv.get("totalAmount", 0)
inv_num = inv.get("invoiceNumber", "無號碼")
# existing = db.query(Transaction).filter(
# Transaction.note == inv["invoiceNumber"]
# ).first()
# if existing:
# continue
# db.add(Transaction(
# user_id="auto_import",
# category=inv["sellerName"],
# amount=inv["totalAmount"],
# note=inv["invoiceNumber"],
# created_at=datetime.fromisoformat(
# inv["invoiceDate"].replace("Z", "+00:00")
# )
# ))
# 美化輸出格式
print(f"新增發票 | 日期: {inv_date[:10]} | 店家: {seller[:15]:<15} | 金額: {amount:>6} | 號碼: {inv_num}")
saved += 1
# db.commit()
print("-" * 30)
print(f"✅ 模擬處理完成:預計新增 {saved} 筆,總計來源 {len(invoices)}")
except Exception as e:
print("❌ 儲存發票失敗:", e)
if 'inv' in locals():
print(f"錯誤發票內容: {inv}")
# db.rollback()
# finally:
# db.close()
async def main():
print("開始抓取發票...")
token = await login_and_get_token()
if not token:
print("登入失敗")
return
invoices = await fetch_invoices(token)
save_invoices(invoices)
if __name__ == "__main__":
asyncio.run(main())