Files
linebot_finance/app/invoice_fetcher.py
2026-03-09 00:52:51 +08:00

216 lines
7.2 KiB
Python
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import base64
import os
import requests
import anthropic
import urllib3
from datetime import datetime, timedelta
from dotenv import load_dotenv
from playwright.async_api import async_playwright
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
from sqlalchemy.orm import declarative_base, sessionmaker
urllib3.disable_warnings()
load_dotenv("../.env")
EINVOICE_USER = os.getenv("EINVOICE_USER")
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
# 本地直接連 localhost
DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Transaction(Base):
__tablename__ = "transactions"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String)
category = Column(String)
amount = Column(Float)
note = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.now)
def solve_captcha(img_b64: str) -> str:
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
msg = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=10,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": img_b64
}
},
{
"type": "text",
"text": "這是驗證碼圖片只有5個數字只回傳這5個數字不要其他任何文字"
}
]
}]
)
return msg.content[0].text.strip()
async def login_and_get_token() -> str | None:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
# 載入登入頁拿 login_challenge
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
from urllib.parse import parse_qs
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
captcha_text = solve_captcha(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
url = page.url
print(f"redirect 後 URL: {url}")
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
await browser.close()
return token
async def fetch_invoices(token: str, days: int = 7) -> list:
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# 格式要有毫秒
def to_iso(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
headers=headers,
json={
"cardCode": "",
"carrierId2": "",
"searchStartDate": to_iso(start_date),
"searchEndDate": to_iso(end_date),
"invoiceStatus": "all",
"isSearchAll": "true"
},
verify=False
)
print(f"JWT status: {res.status_code}")
print(f"JWT response: {res.text[:200]}")
jwt_token = res.text.strip().strip('"')
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
headers=headers,
json={"token": jwt_token},
verify=False
)
await page.wait_for_timeout(3000)
print(f"Invoice status: {res.status_code}")
print(f"Invoice response: {res.text[:300]}")
print(f"拿到 {len(res.json().get('invoices', []))} 筆發票")
# return res.json().get("content", [])
def save_invoices(invoices: list):
db = SessionLocal()
saved = 0
try:
for inv in invoices:
existing = db.query(Transaction).filter(
Transaction.note == inv["invoiceNumber"]
).first()
if existing:
continue
db.add(Transaction(
user_id="auto_import",
category=inv["sellerName"],
amount=inv["totalAmount"],
note=inv["invoiceNumber"],
created_at=datetime.fromisoformat(
inv["invoiceDate"].replace("Z", "+00:00")
)
))
saved += 1
db.commit()
print(f"✅ 新增 {saved} 筆,略過 {len(invoices) - saved} 筆重複")
finally:
db.close()
async def main():
print("開始抓取發票...")
token = await login_and_get_token()
if not token:
print("登入失敗")
return
invoices = await fetch_invoices(token)
print(f"拿到 {len(invoices)} 筆發票")
for inv in invoices:
print(f" {inv['invoiceDate'][:10]} {inv['sellerName']} ${inv['totalAmount']}")
save_invoices(invoices)
if __name__ == "__main__":
asyncio.run(main())