feat: linebot & invoice_fetcher

1. add manual captcha
2. remove transaction
This commit is contained in:
2026-03-09 15:35:49 +08:00
parent 1ea7feacf1
commit 1fa12fcfad
5 changed files with 317 additions and 153 deletions

View File

@@ -1,14 +1,27 @@
import asyncio
import base64
import os
import time
import asyncio
import requests
import anthropic
import urllib3
import threading
import captcha_state
from PIL import Image, ImageOps
import io
import cloudinary
import cloudinary.uploader
from datetime import datetime, timedelta
from dotenv import load_dotenv
from playwright.async_api import async_playwright
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
from sqlalchemy.orm import declarative_base, sessionmaker
from linebot.v3.messaging import (
Configuration, ApiClient, MessagingApi,
PushMessageRequest, TextMessage, ImageMessage
)
from urllib.parse import parse_qs
urllib3.disable_warnings()
load_dotenv("../.env")
@@ -16,6 +29,13 @@ load_dotenv("../.env")
EINVOICE_USER = os.getenv("EINVOICE_USER")
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
MY_USER_ID = os.getenv("LINE_USER_ID")
cloudinary.config(
cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"),
api_key=os.getenv("CLOUDINARY_API_KEY"),
api_secret=os.getenv("CLOUDINARY_API_SECRET")
)
# 本地直接連 localhost
# DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
@@ -23,14 +43,14 @@ ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
# SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
class Transaction(Base):
__tablename__ = "transactions"
id = Column(Integer, primary_key=True, index=True)
user_id = Column(String)
category = Column(String)
amount = Column(Float)
note = Column(String, nullable=True)
created_at = Column(DateTime, default=datetime.now)
# class Transaction(Base):
# __tablename__ = "transactions"
# id = Column(Integer, primary_key=True, index=True)
# user_id = Column(String)
# category = Column(String)
# amount = Column(Float)
# note = Column(String, nullable=True)
# created_at = Column(DateTime, default=datetime.now)
def solve_captcha(img_b64: str) -> str:
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
@@ -57,81 +77,150 @@ def solve_captcha(img_b64: str) -> str:
)
return msg.content[0].text.strip()
async def solve_captcha_manual(img_b64: str):
# 1. 解碼圖片並轉成白底
img_data = base64.b64decode(img_b64)
img = Image.open(io.BytesIO(img_data)).convert("RGBA")
# 建立白色背景
background = Image.new("RGBA", img.size, (255, 255, 255, 255))
background.paste(img, mask=img.split()[3]) # 用 alpha channel 合併
white_img = background.convert("RGB")
# 存成 bytes
buf = io.BytesIO()
white_img.save(buf, format="PNG")
buf.seek(0)
white_b64 = base64.b64encode(buf.read()).decode()
# 1. 上傳圖片到 Cloudinary
upload_res = cloudinary.uploader.upload(
f"data:image/png;base64,{white_b64}",
public_id=f"captcha_{int(time.time())}",
overwrite=True,
quality="auto:best",
fetch_format="png",
)
image_url = upload_res["secure_url"]
print(f"圖片 URL: {image_url}")
# 推播給 LINE
configuration = Configuration(access_token=os.getenv("LINE_CHANNEL_ACCESS_TOKEN"))
with ApiClient(configuration) as api_client:
line_bot_api = MessagingApi(api_client)
line_bot_api.push_message(PushMessageRequest(
to=os.getenv("LINE_USER_ID"),
messages=[
ImageMessage(
original_content_url=image_url,
preview_image_url=image_url
),
TextMessage(text="請輸入驗證碼數字:")
]
))
# 4. 等待回覆
# 用 threading.Event 等待
captcha_state.captcha_answer = None
captcha_state.captcha_event = threading.Event()
triggered = captcha_state.captcha_event.wait(timeout=120)
if not triggered:
raise Exception("⏰ 驗證碼等待超時")
return captcha_state.captcha_answer
async def login_and_get_token() -> str | None:
async with async_playwright() as p:
# 載入登入頁拿 login_challenge
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
max_retry = 3
for attempt in range(max_retry):
print(f"登入嘗試第 {attempt + 1} 次...")
try:
async with async_playwright() as p:
# 載入登入頁拿 login_challenge
browser = await p.chromium.launch(headless=False)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
from urllib.parse import parse_qs
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
captcha_text = solve_captcha(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
# 將拿到的圖片存成檔案穰後轉給linebot處理
# ✅ 透過 LINE Bot 取得驗證碼
captcha_text = await solve_captcha_manual(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
url = page.url
print(f"redirect 後 URL: {url}")
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
url = page.url
print(f"redirect 後 URL: {url}")
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
await browser.close()
return token
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
await browser.close()
if token:
return token
else:
print(f"⚠️ 第 {attempt + 1} 次登入失敗,重試...")
continue
except Exception as e:
print(f"❌ 第 {attempt + 1} 次發生錯誤: {e}")
continue
print("❌ 登入失敗超過最大重試次數")
return None
async def fetch_invoices(token: str, days: int = 7) -> list:
print(f"🔍 開始抓發票token: {token[:20]}")
end_date = datetime.now()
start_date = end_date - timedelta(days=days)