以下是处理微店商品详情 API 接口返回数据的完整流程指南,包含关键字段解析、数据清洗策略和实际应用场景示例:
一、基础数据解析(Python 示例)
import json
from datetime import datetime
def parse_item_data(api_response):
# 封装好的微店商品详情供应商demo url=o0b.cn/ibrad,复制链接注册获取测试。
"""解析微店商品API返回数据"""
try:
# 基础校验
if not api_response or 'result' not in api_response:
raise ValueError("无效的API响应数据")
item_data = api_response['result']
# 核心字段提取
parsed = {
'item_id': item_data.get('itemid'),
'title': item_data.get('item_name'),
'price': item_data.get('price', 0) / 100, # 价格单位转换(分→元)
'original_price': item_data.get('original_price', 0) / 100,
'stock': item_data.get('quantity'),
'sales_count': item_data.get('sold'),
'main_images': [img['url'] for img in item_data.get('imgs', [])],
'detail_html': item_data.get('detail_html'),
'category_id': item_data.get('cid'),
'update_time': datetime.fromtimestamp(item_data.get('update_time', 0)),
'sku_data': process_skus(item_data.get('skus', []))
}
# 空值处理
parsed['main_images'] = parsed['main_images'] or ['default_product.jpg']
parsed['detail_html'] = parsed['detail_html'] or '<p>暂无详情</p>'
return parsed
except Exception as e:
print(f"数据解析失败:{str(e)}")
return None
def process_skus(raw_skus):
"""处理SKU嵌套结构"""
skus = []
for sku in raw_skus:
sku_info = {
'sku_id': sku.get('sku_id'),
'specs': ' '.join([f"{prop['name']}:{prop['value']}"
for prop in sku.get('props', [])]),
'price': sku.get('price', 0) / 100,
'stock': sku.get('quantity'),
'barcode': sku.get('barcode')
}
skus.append(sku_info)
return skus
二、数据清洗与增强策略
1. 图片 URL 处理
def complete_image_urls(images):
"""补全图片域名(微店可能返回相对路径)"""
base_url = "https://img.weidian.com/"
return [url if url.startswith('http') else base_url + url.lstrip('/')
for url in images]
2. HTML 详情清洗
from bs4 import BeautifulSoup
def clean_detail_html(html):
"""去除危险标签和冗余内容"""
soup = BeautifulSoup(html, 'html.parser')
# 移除不安全标签
for tag in soup(['script', 'iframe', 'style']):
tag.decompose()
# 优化图片显示
for img in soup.find_all('img'):
img['src'] = complete_image_urls([img.get('src')])[0]
img['loading'] = 'lazy' # 添加懒加载
# 压缩空白字符
return soup.prettify().replace('\n\n', '')
3. 价格验证与修正
def validate_price(item):
"""价格合理性校验"""
if item['price'] <= 0:
item['price'] = item['original_price']
if item['price'] > item['original_price'] * 10:
raise ValueError(f"异常价格:商品ID {item['item_id']}")
三、数据存储方案
1. MySQL 表设计
CREATE TABLE products (
id VARCHAR(32) PRIMARY KEY COMMENT '商品ID',
title VARCHAR(200) NOT NULL COMMENT '商品标题',
price DECIMAL(10,2) UNSIGNED NOT NULL COMMENT '售价',
original_price DECIMAL(10,2) UNSIGNED COMMENT '原价',
stock INT UNSIGNED DEFAULT 0 COMMENT '库存',
category_id INT COMMENT '类目ID',
update_time DATETIME COMMENT '最后更新时间',
detail MEDIUMTEXT COMMENT '详情HTML',
INDEX idx_category (category_id),
INDEX idx_update (update_time)
);
CREATE TABLE skus (
id VARCHAR(32) PRIMARY KEY COMMENT 'SKU ID',
product_id VARCHAR(32) NOT NULL COMMENT '商品ID',
specs VARCHAR(255) COMMENT '规格组合',
price DECIMAL(10,2) UNSIGNED NOT NULL,
stock INT UNSIGNED DEFAULT 0,
barcode VARCHAR(64) COMMENT '条形码',
FOREIGN KEY (product_id) REFERENCES products(id)
);
2. 批量插入优化
import pymysql
from itertools import islice
def batch_insert(conn, data, batch_size=100):
"""批量写入数据库"""
with conn.cursor() as cursor:
# 商品主表插入
product_sql = """
INSERT INTO products
(id, title, price, original_price, stock, category_id, update_time, detail)
VALUES (%s, %s, %s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
title=VALUES(title),
price=VALUES(price),
stock=VALUES(stock)
"""
products = [
(item['item_id'], item['title'], item['price'],
item['original_price'], item['stock'], item['category_id'],
item['update_time'], item['detail_html'])
for item in data
]
# 分批次插入
for chunk in iter(lambda: list(islice(products, batch_size)), []):
cursor.executemany(product_sql, chunk)
# SKU表插入
sku_sql = """
INSERT INTO skus
(id, product_id, specs, price, stock, barcode)
VALUES (%s, %s, %s, %s, %s, %s)
ON DUPLICATE KEY UPDATE
price=VALUES(price),
stock=VALUES(stock)
"""
skus = []
for item in data:
for sku in item['sku_data']:
skus.append((
sku['sku_id'],
item['item_id'],
sku['specs'],
sku['price'],
sku['stock'],
sku['barcode']
))
for chunk in iter(lambda: list(islice(skus, batch_size)), []):
cursor.executemany(sku_sql, chunk)
conn.commit()
四、高级处理场景
1. 价格监控告警
def price_monitor(item_data, threshold=0.2):
"""价格波动超过阈值时触发通知"""
current_price = item_data['price']
original_price = item_data['original_price']
if original_price == 0:
return # 避免除零错误
change_rate = abs(current_price - original_price) / original_price
if change_rate > threshold:
send_alert(f"商品 {item_data['item_id']} 价格波动达 {change_rate*100:.1f}%")
def send_alert(message):
"""示例:发送企业微信通知"""
import requests
webhook_url = "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=xxx"
payload = {
"msgtype": "text",
"text": {"content": message}
}
requests.post(webhook_url, json=payload)
2. 图片本地化存储
import os
import requests
from concurrent.futures import ThreadPoolExecutor
def download_images(urls, save_dir='images'):
"""多线程下载图片"""
if not os.path.exists(save_dir):
os.makedirs(save_dir)
def download(url):
try:
resp = requests.get(url, timeout=10)
filename = os.path.join(save_dir, url.split('/')[-1])
with open(filename, 'wb') as f:
f.write(resp.content)
return filename
except Exception as e:
print(f"下载失败 {url}: {str(e)}")
return None
with ThreadPoolExecutor(max_workers=5) as executor:
results = list(executor.map(download, urls))
return [r for r in results if r]
五、错误处理与日志
1. 结构化日志记录
# 封装好的微店商品详情供应商demo url=o0b.cn/ibrad,复制链接注册获取测试。
import logging
from logging.handlers import TimedRotatingFileHandler
logger = logging.getLogger('weidian_parser')
logger.setLevel(logging.INFO)
handler = TimedRotatingFileHandler(
'logs/weidian.log',
when='midnight',
backupCount=7,
encoding='utf-8'
)
formatter = logging.Formatter(
'{"time": "%(asctime)s", "level": "%(levelname)s", "msg": "%(message)s"}'
)
handler.setFormatter(formatter)
logger.addHandler(handler)
# 使用示例
try:
parse_item_data(raw_data)
except Exception as e:
logger.error(f"解析失败 | 原始数据: {raw_data} | 错误: {str(e)}")
2. 数据质量监控
def data_quality_check(item):
"""关键字段完整性检查"""
checks = [
(not item['title'], '缺失商品标题'),
(item['price'] <= 0, '价格异常'),
(len(item['main_images']) == 0, '无主图'),
(not item['sku_data'], '缺少SKU信息')
]
errors = [msg for condition, msg in checks if condition]
if errors:
logger.warning(f"数据质量问题 商品ID {item['item_id']}: {', '.join(errors)}")
return False
return True
最佳实践建议
- 缓存策略:对不常变更的数据(如类目信息)使用 Redis 缓存
- 异步处理:使用 Celery 异步执行耗时的图片下载和 HTML 清洗
- 版本控制:在数据库中添加
api_version
字段记录数据来源版本 - 合规性:
- 监控体系:
通过以上处理流程,可确保微店商品数据的高效利用,为价格监控、库存管理、商品推荐等业务场景提供可靠数据支撑。