import asyncio import base64 import os import time import asyncio import requests from groq import Groq import urllib3 import threading import captcha_state from PIL import Image, ImageOps import io import cloudinary import cloudinary.uploader from datetime import datetime, timedelta from dotenv import load_dotenv from playwright.async_api import async_playwright from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text from sqlalchemy.orm import declarative_base, sessionmaker from linebot.v3.messaging import ( Configuration, ApiClient, MessagingApi, PushMessageRequest, TextMessage, ImageMessage ) from urllib.parse import parse_qs urllib3.disable_warnings() load_dotenv("../.env") EINVOICE_USER = os.getenv("EINVOICE_USER") EINVOICE_PASS = os.getenv("EINVOICE_PASS") GROQ_API_KEY = os.getenv("GROQ_API_KEY") MY_USER_ID = os.getenv("LINE_USER_ID") cloudinary.config( cloud_name=os.getenv("CLOUDINARY_CLOUD_NAME"), api_key=os.getenv("CLOUDINARY_API_KEY"), api_secret=os.getenv("CLOUDINARY_API_SECRET") ) # 本地直接連 localhost # DATABASE_URL = os.getenv("LOCAL_DATABASE_URL") # engine = create_engine(DATABASE_URL) # SessionLocal = sessionmaker(bind=engine) Base = declarative_base() # class Transaction(Base): # __tablename__ = "transactions" # id = Column(Integer, primary_key=True, index=True) # user_id = Column(String) # category = Column(String) # amount = Column(Float) # note = Column(String, nullable=True) # created_at = Column(DateTime, default=datetime.now) def solve_captcha(img_b64: str) -> str: # client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY) # 改用groq client = Groq(api_key=GROQ_API_KEY) msg = client.chat.completions.create( model="llama-3.2-11b-vision-preview", max_tokens=10, messages=[{ "role": "user", "content": [ { "type": "image_url", "image_url": { "url": f"data:image/png;base64,{img_b64}" } }, { "type": "text", "text": "這是驗證碼圖片,只有5個數字,只回傳這5個數字,不要其他任何文字" } ] }] ) return msg.choices[0].message.content.strip() async def solve_captcha_manual(img_b64: str): # 1. 解碼圖片並轉成白底 img_data = base64.b64decode(img_b64) img = Image.open(io.BytesIO(img_data)).convert("RGBA") # 建立白色背景 background = Image.new("RGBA", img.size, (255, 255, 255, 255)) background.paste(img, mask=img.split()[3]) # 用 alpha channel 合併 white_img = background.convert("RGB") # 存成 bytes buf = io.BytesIO() white_img.save(buf, format="PNG") buf.seek(0) white_b64 = base64.b64encode(buf.read()).decode() # 1. 上傳圖片到 Cloudinary upload_res = cloudinary.uploader.upload( f"data:image/png;base64,{white_b64}", public_id=f"captcha_{int(time.time())}", overwrite=True, quality="auto:best", fetch_format="png", ) image_url = upload_res["secure_url"] print(f"圖片 URL: {image_url}") # 推播給 LINE configuration = Configuration(access_token=os.getenv("LINE_CHANNEL_ACCESS_TOKEN")) with ApiClient(configuration) as api_client: line_bot_api = MessagingApi(api_client) line_bot_api.push_message(PushMessageRequest( to=os.getenv("LINE_USER_ID"), messages=[ ImageMessage( original_content_url=image_url, preview_image_url=image_url ), TextMessage(text="請輸入驗證碼數字:") ] )) # 4. 等待回覆 # 用 threading.Event 等待 captcha_state.captcha_answer = None captcha_state.captcha_event = threading.Event() triggered = captcha_state.captcha_event.wait(timeout=120) if not triggered: raise Exception("⏰ 驗證碼等待超時") return captcha_state.captcha_answer async def login_and_get_token() -> str | None: max_retry = 3 for attempt in range(max_retry): print(f"登入嘗試第 {attempt + 1} 次...") try: async with async_playwright() as p: # 載入登入頁拿 login_challenge browser = await p.firefox.launch(headless=True) page = await browser.new_page() await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw") await page.wait_for_timeout(8000) url = page.url print(f"目前 URL: {url}") fragment = url.split("?")[-1] if "?" in url else "" params = parse_qs(fragment) login_challenge = params.get("login_challenge", [None])[0] print(f"login_challenge: {login_challenge}") # 拿驗證碼 res = requests.get( "https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha", verify=False ) captcha_data = res.json() captcha_token = captcha_data["token"] # 將拿到的圖片存成檔案穰後轉給linebot處理 # ✅ 透過 LINE Bot 取得驗證碼 # captcha_text = await solve_captcha_manual(captcha_data["image"]) captcha_text = await solve_captcha(captcha_data["image"]) print(f"驗證碼: {captcha_text}") # 登入 res = requests.post( "https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin", json={ "loginType": "U", "userType": "MW", "loginChallenge": login_challenge, "captchaToken": captcha_token, "captcha": captcha_text, "customId": EINVOICE_USER, "password": EINVOICE_PASS, }, verify=False ) data = res.json() redirect_url = data.get("redirectTo") print(f"redirectTo: {redirect_url}") if not redirect_url: print(f"登入失敗: {data}") await browser.close() return None # 跟隨 redirect 讓 token 存進 localStorage await page.goto(redirect_url) await page.wait_for_load_state("domcontentloaded") await page.wait_for_timeout(8000) # 等久一點 url = page.url print(f"redirect 後 URL: {url}") # 印出所有 localStorage # 同時檢查 localStorage 和 sessionStorage local_keys = await page.evaluate("Object.keys(localStorage)") session_keys = await page.evaluate("Object.keys(sessionStorage)") print("localStorage keys:", local_keys) print("sessionStorage keys:", session_keys) await page.wait_for_timeout(3000) for key in session_keys: val = await page.evaluate(f"sessionStorage.getItem('{key}')") print(f" session {key}: {val[:80] if val else None}") token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')") print(f"token: {token[:30] if token else 'None'}") await browser.close() if token: return token else: print(f"⚠️ 第 {attempt + 1} 次登入失敗,重試...") continue except Exception as e: print(f"❌ 第 {attempt + 1} 次發生錯誤: {e}") continue print("❌ 登入失敗超過最大重試次數") return None async def fetch_invoices(token: str, days: int = 7) -> list: print(f"🔍 開始抓發票,token: {token[:20]}") end_date = datetime.now() start_date = end_date - timedelta(days=days) # 格式要有毫秒 def to_iso(dt): return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z" headers = {"authorization": f"Bearer {token}"} # 不去掉 L res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT", headers=headers, json={ "cardCode": "", "carrierId2": "", "searchStartDate": to_iso(start_date), "searchEndDate": to_iso(end_date), "invoiceStatus": "all", "isSearchAll": "true" }, verify=False ) print(f"JWT status: {res.status_code}") print(f"JWT response: {res.text[:200]}") jwt_token = res.text.strip().strip('"') res = requests.post( "https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice", headers=headers, json={"token": jwt_token}, verify=False ) # await page.wait_for_timeout(3000) print(f"Invoice status: {res.status_code}") print(f"Invoice response: {res.text[:300]}") data = res.json() invoice_list = data.get("content", []) print(f"拿到 {len(invoice_list)} 筆發票") # 務必取消註解並回傳,否則 main() 會拿到 None 並報錯 return invoice_list def save_invoices(invoices: list): # db = SessionLocal() saved = 0 try: for inv in invoices: inv_date = inv.get("invoiceDate", "未知日期") seller = inv.get("sellerName", "未知店家") amount = inv.get("totalAmount", 0) inv_num = inv.get("invoiceNumber", "無號碼") # existing = db.query(Transaction).filter( # Transaction.note == inv["invoiceNumber"] # ).first() # if existing: # continue # db.add(Transaction( # user_id="auto_import", # category=inv["sellerName"], # amount=inv["totalAmount"], # note=inv["invoiceNumber"], # created_at=datetime.fromisoformat( # inv["invoiceDate"].replace("Z", "+00:00") # ) # )) # 美化輸出格式 print(f"新增發票 | 日期: {inv_date[:10]} | 店家: {seller[:15]:<15} | 金額: {amount:>6} | 號碼: {inv_num}") saved += 1 # db.commit() print("-" * 30) print(f"✅ 模擬處理完成:預計新增 {saved} 筆,總計來源 {len(invoices)} 筆") except Exception as e: print("❌ 儲存發票失敗:", e) if 'inv' in locals(): print(f"錯誤發票內容: {inv}") # db.rollback() # finally: # db.close() async def main(): print("開始抓取發票...") token = await login_and_get_token() if not token: print("登入失敗") return invoices = await fetch_invoices(token) save_invoices(invoices) if __name__ == "__main__": asyncio.run(main())