mirror of
https://github.com/henry4682/linebot_finance.git
synced 2026-05-16 04:41:52 +00:00
add app files
This commit is contained in:
216
app/invoice_fetcher.py
Normal file
216
app/invoice_fetcher.py
Normal file
@@ -0,0 +1,216 @@
|
||||
import asyncio
|
||||
import base64
|
||||
import os
|
||||
import requests
|
||||
import anthropic
|
||||
import urllib3
|
||||
from datetime import datetime, timedelta
|
||||
from dotenv import load_dotenv
|
||||
from playwright.async_api import async_playwright
|
||||
from sqlalchemy import create_engine, Column, Integer, String, Float, DateTime, text
|
||||
from sqlalchemy.orm import declarative_base, sessionmaker
|
||||
|
||||
urllib3.disable_warnings()
|
||||
load_dotenv("../.env")
|
||||
|
||||
EINVOICE_USER = os.getenv("EINVOICE_USER")
|
||||
EINVOICE_PASS = os.getenv("EINVOICE_PASS")
|
||||
ANTHROPIC_API_KEY = os.getenv("ANTHROPIC_API_KEY")
|
||||
|
||||
# 本地直接連 localhost
|
||||
DATABASE_URL = os.getenv("LOCAL_DATABASE_URL")
|
||||
engine = create_engine(DATABASE_URL)
|
||||
SessionLocal = sessionmaker(bind=engine)
|
||||
Base = declarative_base()
|
||||
|
||||
class Transaction(Base):
|
||||
__tablename__ = "transactions"
|
||||
id = Column(Integer, primary_key=True, index=True)
|
||||
user_id = Column(String)
|
||||
category = Column(String)
|
||||
amount = Column(Float)
|
||||
note = Column(String, nullable=True)
|
||||
created_at = Column(DateTime, default=datetime.now)
|
||||
|
||||
def solve_captcha(img_b64: str) -> str:
|
||||
client = anthropic.Anthropic(api_key=ANTHROPIC_API_KEY)
|
||||
msg = client.messages.create(
|
||||
model="claude-haiku-4-5-20251001",
|
||||
max_tokens=10,
|
||||
messages=[{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image",
|
||||
"source": {
|
||||
"type": "base64",
|
||||
"media_type": "image/png",
|
||||
"data": img_b64
|
||||
}
|
||||
},
|
||||
{
|
||||
"type": "text",
|
||||
"text": "這是驗證碼圖片,只有5個數字,只回傳這5個數字,不要其他任何文字"
|
||||
}
|
||||
]
|
||||
}]
|
||||
)
|
||||
return msg.content[0].text.strip()
|
||||
|
||||
async def login_and_get_token() -> str | None:
|
||||
async with async_playwright() as p:
|
||||
browser = await p.chromium.launch(headless=True)
|
||||
page = await browser.new_page()
|
||||
|
||||
# 載入登入頁拿 login_challenge
|
||||
browser = await p.chromium.launch(headless=False)
|
||||
page = await browser.new_page()
|
||||
await page.goto("https://www.einvoice.nat.gov.tw/accounts/login/mw")
|
||||
await page.wait_for_timeout(8000)
|
||||
url = page.url
|
||||
print(f"目前 URL: {url}")
|
||||
|
||||
from urllib.parse import parse_qs
|
||||
fragment = url.split("?")[-1] if "?" in url else ""
|
||||
params = parse_qs(fragment)
|
||||
login_challenge = params.get("login_challenge", [None])[0]
|
||||
print(f"login_challenge: {login_challenge}")
|
||||
|
||||
# 拿驗證碼
|
||||
res = requests.get(
|
||||
"https://service-mc.einvoice.nat.gov.tw/act/login/api/act002i/captcha",
|
||||
verify=False
|
||||
)
|
||||
captcha_data = res.json()
|
||||
captcha_token = captcha_data["token"]
|
||||
captcha_text = solve_captcha(captcha_data["image"])
|
||||
print(f"驗證碼: {captcha_text}")
|
||||
|
||||
# 登入
|
||||
res = requests.post(
|
||||
"https://service-mc.einvoice.nat.gov.tw/act/login/api/client/doLogin",
|
||||
json={
|
||||
"loginType": "U",
|
||||
"userType": "MW",
|
||||
"loginChallenge": login_challenge,
|
||||
"captchaToken": captcha_token,
|
||||
"captcha": captcha_text,
|
||||
"customId": EINVOICE_USER,
|
||||
"password": EINVOICE_PASS,
|
||||
},
|
||||
verify=False
|
||||
)
|
||||
data = res.json()
|
||||
redirect_url = data.get("redirectTo")
|
||||
print(f"redirectTo: {redirect_url}")
|
||||
|
||||
if not redirect_url:
|
||||
print(f"登入失敗: {data}")
|
||||
await browser.close()
|
||||
return None
|
||||
|
||||
# 跟隨 redirect 讓 token 存進 localStorage
|
||||
await page.goto(redirect_url)
|
||||
await page.wait_for_load_state("domcontentloaded")
|
||||
await page.wait_for_timeout(8000) # 等久一點
|
||||
|
||||
url = page.url
|
||||
print(f"redirect 後 URL: {url}")
|
||||
|
||||
# 印出所有 localStorage
|
||||
# 同時檢查 localStorage 和 sessionStorage
|
||||
local_keys = await page.evaluate("Object.keys(localStorage)")
|
||||
session_keys = await page.evaluate("Object.keys(sessionStorage)")
|
||||
print("localStorage keys:", local_keys)
|
||||
print("sessionStorage keys:", session_keys)
|
||||
await page.wait_for_timeout(3000)
|
||||
for key in session_keys:
|
||||
val = await page.evaluate(f"sessionStorage.getItem('{key}')")
|
||||
print(f" session {key}: {val[:80] if val else None}")
|
||||
|
||||
token = await page.evaluate("sessionStorage.getItem('token') || localStorage.getItem('token')")
|
||||
print(f"token: {token[:30] if token else 'None'}")
|
||||
|
||||
await browser.close()
|
||||
return token
|
||||
|
||||
async def fetch_invoices(token: str, days: int = 7) -> list:
|
||||
end_date = datetime.now()
|
||||
start_date = end_date - timedelta(days=days)
|
||||
|
||||
# 格式要有毫秒
|
||||
def to_iso(dt):
|
||||
return dt.strftime("%Y-%m-%dT%H:%M:%S.") + f"{dt.microsecond // 1000:03d}Z"
|
||||
|
||||
headers = {"authorization": f"Bearer {token}"} # 不去掉 L
|
||||
|
||||
res = requests.post(
|
||||
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/getSearchCarrierInvoiceListJWT",
|
||||
headers=headers,
|
||||
json={
|
||||
"cardCode": "",
|
||||
"carrierId2": "",
|
||||
"searchStartDate": to_iso(start_date),
|
||||
"searchEndDate": to_iso(end_date),
|
||||
"invoiceStatus": "all",
|
||||
"isSearchAll": "true"
|
||||
},
|
||||
verify=False
|
||||
)
|
||||
print(f"JWT status: {res.status_code}")
|
||||
print(f"JWT response: {res.text[:200]}")
|
||||
jwt_token = res.text.strip().strip('"')
|
||||
|
||||
res = requests.post(
|
||||
"https://service-mc.einvoice.nat.gov.tw/btc/cloud/api/btc502w/searchCarrierInvoice",
|
||||
headers=headers,
|
||||
json={"token": jwt_token},
|
||||
verify=False
|
||||
)
|
||||
await page.wait_for_timeout(3000)
|
||||
print(f"Invoice status: {res.status_code}")
|
||||
print(f"Invoice response: {res.text[:300]}")
|
||||
print(f"拿到 {len(res.json().get('invoices', []))} 筆發票")
|
||||
# return res.json().get("content", [])
|
||||
|
||||
def save_invoices(invoices: list):
|
||||
db = SessionLocal()
|
||||
saved = 0
|
||||
try:
|
||||
for inv in invoices:
|
||||
existing = db.query(Transaction).filter(
|
||||
Transaction.note == inv["invoiceNumber"]
|
||||
).first()
|
||||
if existing:
|
||||
continue
|
||||
db.add(Transaction(
|
||||
user_id="auto_import",
|
||||
category=inv["sellerName"],
|
||||
amount=inv["totalAmount"],
|
||||
note=inv["invoiceNumber"],
|
||||
created_at=datetime.fromisoformat(
|
||||
inv["invoiceDate"].replace("Z", "+00:00")
|
||||
)
|
||||
))
|
||||
saved += 1
|
||||
db.commit()
|
||||
print(f"✅ 新增 {saved} 筆,略過 {len(invoices) - saved} 筆重複")
|
||||
finally:
|
||||
db.close()
|
||||
|
||||
async def main():
|
||||
print("開始抓取發票...")
|
||||
token = await login_and_get_token()
|
||||
if not token:
|
||||
print("登入失敗")
|
||||
return
|
||||
|
||||
invoices = await fetch_invoices(token)
|
||||
print(f"拿到 {len(invoices)} 筆發票")
|
||||
for inv in invoices:
|
||||
print(f" {inv['invoiceDate'][:10]} {inv['sellerName']} ${inv['totalAmount']}")
|
||||
|
||||
save_invoices(invoices)
|
||||
|
||||
if __name__ == "__main__":
|
||||
asyncio.run(main())
|
||||
Reference in New Issue
Block a user