Files
linebot_finance/app/invoice_fetcher.py

316 lines
11 KiB
Python
Executable File
Raw Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
import asyncio
import base64
import os
import time
import asyncio
import requests
from groq import Groq
import urllib3
import threading
import captcha_state
from PIL import Image, ImageOps
import io
import cloudinary
import cloudinary.uploader
from datetime import datetime, timedelta
from dotenv import load_dotenv
from playwright.async_api import async_playwright
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
from sqlalchemy.orm import declarative_base, sessionmaker
from linebot.v3.messaging import (
Configuration, ApiClient, MessagingApi,
PushMessageRequest, TextMessage, ImageMessage
)
from urllib.parse import parse_qs
urllib3.disable_warnings()
load_dotenv("../.env")
EINVOICE_USER = os.getenv("EINVOICE_USER")
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
GROQ_API_KEY = os.getenv("GROQ_API_KEY")
MY_USER_ID = os.getenv("LINE_USER_ID")
cloudinary.config(
cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"),
api_key=os.getenv("CLOUDINARY_API_KEY"),
api_secret=os.getenv("CLOUDINARY_API_SECRET")
)
# 本地直接連 localhost
# DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
# engine = create_engine(DATABASE_URL)
# SessionLocal = sessionmaker(bind=engine)
Base = declarative_base()
# class Transaction(Base):
# __tablename__ = "transactions"
# id = Column(Integer, primary_key=True, index=True)
# user_id = Column(String)
# category = Column(String)
# amount = Column(Float)
# note = Column(String, nullable=True)
# created_at = Column(DateTime, default=datetime.now)
def solve_captcha(img_b64: str) -> str:
# client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
# 改用groq
client = Groq(api_key=GROQ_API_KEY)
msg = client.chat.completions.create(
model="llama-3.2-11b-vision-preview",
max_tokens=10,
messages=[{
"role": "user",
"content": [
{
"type": "image_url",
"image_url": {
"url": f"data:image/png;base64,{img_b64}"
}
},
{
"type": "text",
"text": "這是驗證碼圖片只有5個數字只回傳這5個數字不要其他任何文字"
}
]
}]
)
return msg.choices[0].message.content.strip()
async def solve_captcha_manual(img_b64: str):
# 1. 解碼圖片並轉成白底
img_data = base64.b64decode(img_b64)
img = Image.open(io.BytesIO(img_data)).convert("RGBA")
# 建立白色背景
background = Image.new("RGBA", img.size, (255, 255, 255, 255))
background.paste(img, mask=img.split()[3]) # 用 alpha channel 合併
white_img = background.convert("RGB")
# 存成 bytes
buf = io.BytesIO()
white_img.save(buf, format="PNG")
buf.seek(0)
white_b64 = base64.b64encode(buf.read()).decode()
# 1. 上傳圖片到 Cloudinary
upload_res = cloudinary.uploader.upload(
f"data:image/png;base64,{white_b64}",
public_id=f"captcha_{int(time.time())}",
overwrite=True,
quality="auto:best",
fetch_format="png",
)
image_url = upload_res["secure_url"]
print(f"圖片 URL: {image_url}")
# 推播給 LINE
configuration = Configuration(access_token=os.getenv("LINE_CHANNEL_ACCESS_TOKEN"))
with ApiClient(configuration) as api_client:
line_bot_api = MessagingApi(api_client)
line_bot_api.push_message(PushMessageRequest(
to=os.getenv("LINE_USER_ID"),
messages=[
ImageMessage(
original_content_url=image_url,
preview_image_url=image_url
),
TextMessage(text="請輸入驗證碼數字:")
]
))
# 4. 等待回覆
# 用 threading.Event 等待
captcha_state.captcha_answer = None
captcha_state.captcha_event = threading.Event()
triggered = captcha_state.captcha_event.wait(timeout=120)
if not triggered:
raise Exception("⏰ 驗證碼等待超時")
return captcha_state.captcha_answer
async def login_and_get_token() -> str | None:
max_retry = 3
for attempt in range(max_retry):
print(f"登入嘗試第 {attempt + 1} 次...")
try:
async with async_playwright() as p:
# 載入登入頁拿 login_challenge
browser = await p.firefox.launch(headless=True)
page = await browser.new_page()
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
await page.wait_for_timeout(8000)
url = page.url
print(f"目前 URL: {url}")
fragment = url.split("?")[-1] if "?" in url else ""
params = parse_qs(fragment)
login_challenge = params.get("login_challenge", [None])[0]
print(f"login_challenge: {login_challenge}")
# 拿驗證碼
res = requests.get(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
verify=False
)
captcha_data = res.json()
captcha_token = captcha_data["token"]
# 將拿到的圖片存成檔案穰後轉給linebot處理
# ✅ 透過 LINE Bot 取得驗證碼
# captcha_text = await solve_captcha_manual(captcha_data["image"])
captcha_text = await solve_captcha(captcha_data["image"])
print(f"驗證碼: {captcha_text}")
# 登入
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
json={
"loginType": "U",
"userType": "MW",
"loginChallenge": login_challenge,
"captchaToken": captcha_token,
"captcha": captcha_text,
"customId": EINVOICE_USER,
"password": EINVOICE_PASS,
},
verify=False
)
data = res.json()
redirect_url = data.get("redirectTo")
print(f"redirectTo: {redirect_url}")
if not redirect_url:
print(f"登入失敗: {data}")
await browser.close()
return None
# 跟隨 redirect 讓 token 存進 localStorage
await page.goto(redirect_url)
await page.wait_for_load_state("domcontentloaded")
await page.wait_for_timeout(8000) # 等久一點
url = page.url
print(f"redirect 後 URL: {url}")
# 印出所有 localStorage
# 同時檢查 localStorage 和 sessionStorage
local_keys = await page.evaluate("Object.keys(localStorage)")
session_keys = await page.evaluate("Object.keys(sessionStorage)")
print("localStorage keys:", local_keys)
print("sessionStorage keys:", session_keys)
await page.wait_for_timeout(3000)
for key in session_keys:
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
print(f" session {key}: {val[:80] if val else None}")
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
print(f"token: {token[:30] if token else 'None'}")
await browser.close()
if token:
return token
else:
print(f"⚠️ 第 {attempt + 1} 次登入失敗,重試...")
continue
except Exception as e:
print(f"❌ 第 {attempt + 1} 次發生錯誤: {e}")
continue
print("❌ 登入失敗超過最大重試次數")
return None
async def fetch_invoices(token: str, days: int = 7) -> list:
print(f"🔍 開始抓發票token: {token[:20]}")
end_date = datetime.now()
start_date = end_date - timedelta(days=days)
# 格式要有毫秒
def to_iso(dt):
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
headers=headers,
json={
"cardCode": "",
"carrierId2": "",
"searchStartDate": to_iso(start_date),
"searchEndDate": to_iso(end_date),
"invoiceStatus": "all",
"isSearchAll": "true"
},
verify=False
)
print(f"JWT status: {res.status_code}")
print(f"JWT response: {res.text[:200]}")
jwt_token = res.text.strip().strip('"')
res = requests.post(
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
headers=headers,
json={"token": jwt_token},
verify=False
)
# await page.wait_for_timeout(3000)
print(f"Invoice status: {res.status_code}")
print(f"Invoice response: {res.text[:300]}")
data = res.json()
invoice_list = data.get("content", [])
print(f"拿到 {len(invoice_list)} 筆發票")
# 務必取消註解並回傳,否則 main() 會拿到 None 並報錯
return invoice_list
def save_invoices(invoices: list):
# db = SessionLocal()
saved = 0
try:
for inv in invoices:
inv_date = inv.get("invoiceDate", "未知日期")
seller = inv.get("sellerName", "未知店家")
amount = inv.get("totalAmount", 0)
inv_num = inv.get("invoiceNumber", "無號碼")
# existing = db.query(Transaction).filter(
# Transaction.note == inv["invoiceNumber"]
# ).first()
# if existing:
# continue
# db.add(Transaction(
# user_id="auto_import",
# category=inv["sellerName"],
# amount=inv["totalAmount"],
# note=inv["invoiceNumber"],
# created_at=datetime.fromisoformat(
# inv["invoiceDate"].replace("Z", "+00:00")
# )
# ))
# 美化輸出格式
print(f"新增發票 | 日期: {inv_date[:10]} | 店家: {seller[:15]:<15} | 金額: {amount:>6} | 號碼: {inv_num}")
saved += 1
# db.commit()
print("-" * 30)
print(f"✅ 模擬處理完成:預計新增 {saved} 筆,總計來源 {len(invoices)}")
except Exception as e:
print("❌ 儲存發票失敗:", e)
if 'inv' in locals():
print(f"錯誤發票內容: {inv}")
# db.rollback()
# finally:
# db.close()
async def main():
print("開始抓取發票...")
token = await login_and_get_token()
if not token:
print("登入失敗")
return
invoices = await fetch_invoices(token)
save_invoices(invoices)
if __name__ == "__main__":
asyncio.run(main())