mirror of
https://github.com/henry4682/linebot_finance.git
synced 2026-05-16 04:41:52 +00:00
224 lines
7.6 KiB
Python
224 lines
7.6 KiB
Python
import asyncio
|
||
import base64
|
||
import os
|
||
import requests
|
||
import anthropic
|
||
import urllib3
|
||
from datetime import datetime, timedelta
|
||
from dotenv import load_dotenv
|
||
from playwright.async_api import async_playwright
|
||
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
|
||
from sqlalchemy.orm import declarative_base, sessionmaker
|
||
|
||
urllib3.disable_warnings()
|
||
load_dotenv("../.env")
|
||
|
||
EINVOICE_USER = os.getenv("EINVOICE_USER")
|
||
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
|
||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
|
||
|
||
# 本地直接連 localhost
|
||
# DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
|
||
# engine = create_engine(DATABASE_URL)
|
||
# SessionLocal = sessionmaker(bind=engine)
|
||
Base = declarative_base()
|
||
|
||
class Transaction(Base):
|
||
__tablename__ = "transactions"
|
||
id = Column(Integer, primary_key=True, index=True)
|
||
user_id = Column(String)
|
||
category = Column(String)
|
||
amount = Column(Float)
|
||
note = Column(String, nullable=True)
|
||
created_at = Column(DateTime, default=datetime.now)
|
||
|
||
def solve_captcha(img_b64: str) -> str:
|
||
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
|
||
msg = client.messages.create(
|
||
model="claude-haiku-4-5-20251001",
|
||
max_tokens=10,
|
||
messages=[{
|
||
"role": "user",
|
||
"content": [
|
||
{
|
||
"type": "image",
|
||
"source": {
|
||
"type": "base64",
|
||
"media_type": "image/png",
|
||
"data": img_b64
|
||
}
|
||
},
|
||
{
|
||
"type": "text",
|
||
"text": "這是驗證碼圖片,只有5個數字,只回傳這5個數字,不要其他任何文字"
|
||
}
|
||
]
|
||
}]
|
||
)
|
||
return msg.content[0].text.strip()
|
||
|
||
async def login_and_get_token() -> str | None:
|
||
async with async_playwright() as p:
|
||
# 載入登入頁拿 login_challenge
|
||
browser = await p.chromium.launch(headless=False)
|
||
page = await browser.new_page()
|
||
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
|
||
await page.wait_for_timeout(8000)
|
||
url = page.url
|
||
print(f"目前 URL: {url}")
|
||
|
||
from urllib.parse import parse_qs
|
||
fragment = url.split("?")[-1] if "?" in url else ""
|
||
params = parse_qs(fragment)
|
||
login_challenge = params.get("login_challenge", [None])[0]
|
||
print(f"login_challenge: {login_challenge}")
|
||
|
||
# 拿驗證碼
|
||
res = requests.get(
|
||
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
|
||
verify=False
|
||
)
|
||
captcha_data = res.json()
|
||
captcha_token = captcha_data["token"]
|
||
captcha_text = solve_captcha(captcha_data["image"])
|
||
print(f"驗證碼: {captcha_text}")
|
||
|
||
# 登入
|
||
res = requests.post(
|
||
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
|
||
json={
|
||
"loginType": "U",
|
||
"userType": "MW",
|
||
"loginChallenge": login_challenge,
|
||
"captchaToken": captcha_token,
|
||
"captcha": captcha_text,
|
||
"customId": EINVOICE_USER,
|
||
"password": EINVOICE_PASS,
|
||
},
|
||
verify=False
|
||
)
|
||
data = res.json()
|
||
redirect_url = data.get("redirectTo")
|
||
print(f"redirectTo: {redirect_url}")
|
||
|
||
if not redirect_url:
|
||
print(f"登入失敗: {data}")
|
||
await browser.close()
|
||
return None
|
||
|
||
# 跟隨 redirect 讓 token 存進 localStorage
|
||
await page.goto(redirect_url)
|
||
await page.wait_for_load_state("domcontentloaded")
|
||
await page.wait_for_timeout(8000) # 等久一點
|
||
|
||
url = page.url
|
||
print(f"redirect 後 URL: {url}")
|
||
|
||
# 印出所有 localStorage
|
||
# 同時檢查 localStorage 和 sessionStorage
|
||
local_keys = await page.evaluate("Object.keys(localStorage)")
|
||
session_keys = await page.evaluate("Object.keys(sessionStorage)")
|
||
print("localStorage keys:", local_keys)
|
||
print("sessionStorage keys:", session_keys)
|
||
await page.wait_for_timeout(3000)
|
||
for key in session_keys:
|
||
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
|
||
print(f" session {key}: {val[:80] if val else None}")
|
||
|
||
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
|
||
print(f"token: {token[:30] if token else 'None'}")
|
||
|
||
await browser.close()
|
||
return token
|
||
|
||
async def fetch_invoices(token: str, days: int = 7) -> list:
|
||
end_date = datetime.now()
|
||
start_date = end_date - timedelta(days=days)
|
||
|
||
# 格式要有毫秒
|
||
def to_iso(dt):
|
||
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
|
||
|
||
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
|
||
|
||
res = requests.post(
|
||
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
|
||
headers=headers,
|
||
json={
|
||
"cardCode": "",
|
||
"carrierId2": "",
|
||
"searchStartDate": to_iso(start_date),
|
||
"searchEndDate": to_iso(end_date),
|
||
"invoiceStatus": "all",
|
||
"isSearchAll": "true"
|
||
},
|
||
verify=False
|
||
)
|
||
print(f"JWT status: {res.status_code}")
|
||
print(f"JWT response: {res.text[:200]}")
|
||
jwt_token = res.text.strip().strip('"')
|
||
|
||
res = requests.post(
|
||
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
|
||
headers=headers,
|
||
json={"token": jwt_token},
|
||
verify=False
|
||
)
|
||
# await page.wait_for_timeout(3000)
|
||
print(f"Invoice status: {res.status_code}")
|
||
print(f"Invoice response: {res.text[:300]}")
|
||
data = res.json()
|
||
invoice_list = data.get("content", [])
|
||
print(f"拿到 {len(invoice_list)} 筆發票")
|
||
|
||
# 務必取消註解並回傳,否則 main() 會拿到 None 並報錯
|
||
return invoice_list
|
||
|
||
def save_invoices(invoices: list):
|
||
# db = SessionLocal()
|
||
saved = 0
|
||
try:
|
||
for inv in invoices:
|
||
inv_date = inv.get("invoiceDate", "未知日期")
|
||
seller = inv.get("sellerName", "未知店家")
|
||
amount = inv.get("totalAmount", 0)
|
||
inv_num = inv.get("invoiceNumber", "無號碼")
|
||
# existing = db.query(Transaction).filter(
|
||
# Transaction.note == inv["invoiceNumber"]
|
||
# ).first()
|
||
# if existing:
|
||
# continue
|
||
# db.add(Transaction(
|
||
# user_id="auto_import",
|
||
# category=inv["sellerName"],
|
||
# amount=inv["totalAmount"],
|
||
# note=inv["invoiceNumber"],
|
||
# created_at=datetime.fromisoformat(
|
||
# inv["invoiceDate"].replace("Z", "+00:00")
|
||
# )
|
||
# ))
|
||
# 美化輸出格式
|
||
print(f"新增發票 | 日期: {inv_date[:10]} | 店家: {seller[:15]:<15} | 金額: {amount:>6} | 號碼: {inv_num}")
|
||
saved += 1
|
||
# db.commit()
|
||
print("-" * 30)
|
||
print(f"✅ 模擬處理完成:預計新增 {saved} 筆,總計來源 {len(invoices)} 筆")
|
||
except Exception as e:
|
||
print("❌ 儲存發票失敗:", e)
|
||
if 'inv' in locals():
|
||
print(f"錯誤發票內容: {inv}")
|
||
# db.rollback()
|
||
# finally:
|
||
# db.close()
|
||
|
||
async def main():
|
||
print("開始抓取發票...")
|
||
token = await login_and_get_token()
|
||
if not token:
|
||
print("登入失敗")
|
||
return
|
||
invoices = await fetch_invoices(token)
|
||
save_invoices(invoices)
|
||
|
||
if __name__ == "__main__":
|
||
asyncio.run(main()) |