以下是處理微店商品詳情 API 接口返回數據的完整流程指南,包含關鍵字段解析、數據清洗策略和實際應用場景示例:
一、基礎數據解析(Python 示例)
import json
from datetime import datetime
def parse_item_data(api_response):
# 封裝好的微店商品詳情供應商demo url=o0b.cn/ibrad,復制鏈接注冊獲取測試。
"""解析微店商品API返回數據"""
try:
# 基礎校驗
if not api_response or 'result' not in api_response:
raise ValueError("無效的API響應數據")
item_data = api_response['result']
# 核心字段提取
parsed = {
'item_id': item_data.get('itemid'),
'title': item_data.get('item_name'),
'price': item_data.get('price', 0) / 100, # 價格單位轉換(分→元)
'original_price': item_data.get('original_price', 0) / 100,
'stock': item_data.get('quantity'),
'sales_count': item_data.get('sold'),
'main_images': [img['url'] for img in item_data.get('imgs', [])],
'detail_html': item_data.get('detail_html'),
'category_id': item_data.get('cid'),
'update_time': datetime.fromtimestamp(item_data.get('update_time', 0)),
'sku_data': process_skus(item_data.get('skus', []))
}
# 空值處理
parsed['main_images'] = parsed['main_images'] or ['default_product.jpg']
parsed['detail_html'] = parsed['detail_html'] or '<p>暫無詳情</p>'
return parsed
except Exception as e:
print(f"數據解析失?。簕str(e)}")
return None
def process_skus(raw_skus):
"""處理SKU嵌套結構"""
skus = []
for sku in raw_skus:
sku_info = {
'sku_id': sku.get('sku_id'),
'specs': ' '.join([f"{prop['name']}:{prop['value']}"
for prop in sku.get('props', [])]),
'price': sku.get('price', 0) / 100,
'stock': sku.get('quantity'),
'barcode': sku.get('barcode')
}
skus.append(sku_info)
return skus
二、數據清洗與增強策略
1. 圖片 URL 處理
def complete_image_urls(images):
"""補全圖片域名(微店可能返回相對路徑)"""
base_url = "https://img.weidian.com/"
return [url if url.startswith('http') else base_url + url.lstrip('/')
for url in images]
2. HTML 詳情清洗
from bs4 import BeautifulSoup
def clean_detail_html(html):
"""去除危險標簽和冗余內容"""
soup = BeautifulSoup(html, 'html.parser')
# 移除不安全標簽
for tag in soup(['script', 'iframe', 'style']):
tag.decompose()
# 優(yōu)化圖片顯示
for img in soup.find_all('img'):
img['src'] = complete_image_urls([img.get('src')])[0]
img['loading'] = 'lazy' # 添加懶加載
# 壓縮空白字符
return soup.prettify().replace('\n\n', '')
3. 價格驗證與修正
def validate_price(item):
"""價格合理性校驗"""
if item['price'] <= 0:
item['price'] = item['original_price']
if item['price'] > item['original_price'] * 10:
raise ValueError(f"異常價格:商品ID {item['item_id']}")
三、數據存儲方案
1. MySQL 表設計
CREATE TABLE products (
id VARCHAR(32) PRIMARY KEY COMMENT '商品ID',
title VARCHAR(200) NOT NULL COMMENT '商品標題',
price DECIMAL(10,2) UNSIGNED NOT NULL COMMENT '售價',
original_price DECIMAL(10,2) UNSIGNED COMMENT '原價',
stock INT UNSIGNED DEFAULT 0 COMMENT '庫存',
category_id INT COMMENT '類目ID',
update_time DATETIME COMMENT '最后更新時間',
detail MEDIUMTEXT COMMENT '詳情HTML',
INDEX idx_category (category_id),
INDEX idx_update (update_time)
);
CREATE TABLE skus (
id VARCHAR(32) PRIMARY KEY COMMENT 'SKU ID',
product_id VARCHAR(32) NOT NULL COMMENT '商品ID',
specs VARCHAR(255) COMMENT '規(guī)格組合',
price DECIMAL(10,2) UNSIGNED NOT NULL,
stock INT UNSIGNED DEFAULT 0,
barcode VARCHAR(64) COMMENT '條形碼',
FOREIGN KEY (product_id) REFERENCES products(id)
);
2. 批量插入優(yōu)化
import pymysql
from itertools import islice
def batch_insert(conn, data, batch_size=100):
"""批量寫入數據庫"""
with conn.cursor() as cursor:
# 商品主表插入
product_sql = """
INSERT INTO products
(id, title, price, original_price, stock, category_id, update_time, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
title=VALUES(title),
price=VALUES(price),
stock=VALUES(stock)
"""
products = [
(item['item_id'], item['title'], item['price'],
item['original_price'], item['stock'], item['category_id'],
item['update_time'], item['detail_html'])
for item in data
]
# 分批次插入
for chunk in iter(lambda: list(islice(products, batch_size)), []):
cursor.executemany(product_sql, chunk)
# SKU表插入
sku_sql = """
INSERT INTO skus
(id, product_id, specs, price, stock, barcode)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
price=VALUES(price),
stock=VALUES(stock)
"""
skus = []
for item in data:
for sku in item['sku_data']:
skus.append((
sku['sku_id'],
item['item_id'],
sku['specs'],
sku['price'],
sku['stock'],
sku['barcode']
))
for chunk in iter(lambda: list(islice(skus, batch_size)), []):
cursor.executemany(sku_sql, chunk)
conn.commit()
四、高級處理場景
1. 價格監(jiān)控告警
def price_monitor(item_data, threshold=0.2):
"""價格波動超過閾值時觸發(fā)通知"""
current_price = item_data['price']
original_price = item_data['original_price']
if original_price == 0:
return # 避免除零錯誤
change_rate = abs(current_price - original_price) / original_price
if change_rate > threshold:
send_alert(f"商品 {item_data['item_id']} 價格波動達 {change_rate*100:.1f}%")
def send_alert(message):
"""示例:發(fā)送企業(yè)微信通知"""
import requests
webhook_url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx"
payload = {
"msgtype": "text",
"text": {"content": message}
}
requests.post(webhook_url, json=payload)
2. 圖片本地化存儲
import os
import requests
from concurrent.futures import ThreadPoolExecutor
def download_images(urls, save_dir='images'):
"""多線程下載圖片"""
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def download(url):
try:
resp = requests.get(url, timeout=10)
filename = os.path.join(save_dir, url.split('/')[-1])
with open(filename, 'wb') as f:
f.write(resp.content)
return filename
except Exception as e:
print(f"下載失敗 {url}: {str(e)}")
return None
with ThreadPoolExecutor(max_workers=5) as executor:
results = list(executor.map(download, urls))
return [r for r in results if r]
五、錯誤處理與日志
1. 結構化日志記錄
# 封裝好的微店商品詳情供應商demo url=o0b.cn/ibrad,復制鏈接注冊獲取測試。
import logging
from logging.handlers import TimedRotatingFileHandler
logger = logging.getLogger('weidian_parser')
logger.setLevel(logging.INFO)
handler = TimedRotatingFileHandler(
'logs/weidian.log',
when='midnight',
backupCount=7,
encoding='utf-8'
)
formatter = logging.Formatter(
'{"time": "%(asctime)s", "level": "%(levelname)s", "msg": "%(message)s"}'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
# 使用示例
try:
parse_item_data(raw_data)
except Exception as e:
logger.error(f"解析失敗 | 原始數據: {raw_data} | 錯誤: {str(e)}")
2. 數據質量監(jiān)控
def data_quality_check(item):
"""關鍵字段完整性檢查"""
checks = [
(not item['title'], '缺失商品標題'),
(item['price'] <= 0, '價格異常'),
(len(item['main_images']) == 0, '無主圖'),
(not item['sku_data'], '缺少SKU信息')
]
errors = [msg for condition, msg in checks if condition]
if errors:
logger.warning(f"數據質量問題 商品ID {item['item_id']}: {', '.join(errors)}")
return False
return True
最佳實踐建議
- 緩存策略:對不常變更的數據(如類目信息)使用 Redis 緩存
- 異步處理:使用 Celery 異步執(zhí)行耗時的圖片下載和 HTML 清洗
- 版本控制:在數據庫中添加
api_version
字段記錄數據來源版本 - 合規(guī)性:
- 監(jiān)控體系:
通過以上處理流程,可確保微店商品數據的高效利用,為價格監(jiān)控、庫存管理、商品推薦等業(yè)務場景提供可靠數據支撐。