add app files

This commit is contained in:
2026-03-09 00:52:51 +08:00
parent 9b4c7cfeda
commit 423d7a573c
10 changed files with 1906 additions and 0 deletions

216
app/invoice_fetcher.py Normal file
View File

@@ -0,0 +1,216 @@
import asyncio
import base64
import os
import requests
import anthropic
import urllib3
from datetime import datetime, timedelta
from dotenv import load_dotenv
from playwright.async_api import async_playwright
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
from sqlalchemy.orm import declarative_base, sessionmaker
urllib3.disable_warnings()
load_dotenv("../.env")
EINVOICE_USER = os.getenv("EINVOICE_USER")
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
# 本地直接連 localhost
DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
engine = create_engine(DATABASE_URL)
SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Transaction(Base):
__tablename__ = "transactions"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String)
category = Column(String)
amount = Column(Float)
note = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.now)
def solve_captcha(img_b64: str) -> str:
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
msg = client.messages.create(
model="claude-haiku-4-5-20251001",
max_tokens=10,
messages=[{
"role": "user",
"content": [
{
"type": "image",
"source": {
"type": "base64",
"media_type": "image/png",
"data": img_b64
}
},
{
"type": "text",
"text": "這是驗證碼圖片只有5個數字只回傳這5個數字不要其他任何文字"
}
]
}]
)
return msg.content[0].text.strip()
async def login_and_get_token() -> str | None:
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
page = await browser.new_page()
# 載入登入頁拿 login_challenge
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
from urllib.parse import parse_qs
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
captcha_text = solve_captcha(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
url = page.url
print(f"redirect 後 URL: {url}")
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
await browser.close()
return token
async def fetch_invoices(token: str, days: int = 7) -> list:
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# 格式要有毫秒
def to_iso(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
headers=headers,
json={
"cardCode": "",
"carrierId2": "",
"searchStartDate": to_iso(start_date),
"searchEndDate": to_iso(end_date),
"invoiceStatus": "all",
"isSearchAll": "true"
},
verify=False
)
print(f"JWT status: {res.status_code}")
print(f"JWT response: {res.text[:200]}")
jwt_token = res.text.strip().strip('"')
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
headers=headers,
json={"token": jwt_token},
verify=False
)
await page.wait_for_timeout(3000)
print(f"Invoice status: {res.status_code}")
print(f"Invoice response: {res.text[:300]}")
print(f"拿到 {len(res.json().get('invoices', []))} 筆發票")
# return res.json().get("content", [])
def save_invoices(invoices: list):
db = SessionLocal()
saved = 0
try:
for inv in invoices:
existing = db.query(Transaction).filter(
Transaction.note == inv["invoiceNumber"]
).first()
if existing:
continue
db.add(Transaction(
user_id="auto_import",
category=inv["sellerName"],
amount=inv["totalAmount"],
note=inv["invoiceNumber"],
created_at=datetime.fromisoformat(
inv["invoiceDate"].replace("Z", "+00:00")
)
))
saved += 1
db.commit()
print(f"✅ 新增 {saved} 筆,略過 {len(invoices) - saved} 筆重複")
finally:
db.close()
async def main():
print("開始抓取發票...")
token = await login_and_get_token()
if not token:
print("登入失敗")
return
invoices = await fetch_invoices(token)
print(f"拿到 {len(invoices)} 筆發票")
for inv in invoices:
print(f" {inv['invoiceDate'][:10]} {inv['sellerName']} ${inv['totalAmount']}")
save_invoices(invoices)
if __name__ == "__main__":
asyncio.run(main())