從新手到高手:掌握這十個字典和集合技巧,代碼效率翻倍
你是否曾經為了查找一個值,寫了一個嵌套循環,最后才發現用字典一行就解決了?你是否曾經用列表去重,結果等了半天,后來才知道集合能瞬間完成?你是否曾經因為鍵不存在而報 KeyError,導致程序崩潰?本文會教你10個日常開發中最實用的字典和集合技巧,讓你寫代碼更快更穩。

一、字典的日常技巧(1-6)
1:get() 和 setdefault() —— 安全地獲取和設置值
問題場景:
# 你肯定遇到過這個問題
user_info = {"name": "張三", "age": 25}
# ? 直接訪問會報錯
email = user_info["email"]
# KeyError: 'email'
# ? 用 get() 安全獲取
email = user_info.get("email")
print(email) # None
# ? 提供默認值
email = user_info.get("email", "unknown@example.com")
print(email) # unknown@example.comget() vs setdefault() 的區別:
settings = {"theme": "dark"}
# get() —— 只讀取,不修改
result = settings.get("language", "zh_CN")
print(result) # zh_CN
print(settings) # {'theme': 'dark'}(不變)
# setdefault() —— 讀取,如果不存在就設置
result = settings.setdefault("language", "zh_CN")
print(result) # zh_CN
print(settings) # {'theme': 'dark', 'language': 'zh_CN'}(被設置了)
# 實戰應用:初始化配置
def get_user_settings(user_id):
"""獲取用戶設置,不存在則使用默認值"""
settings = {}
# 多個默認值的情況
settings.setdefault("theme", "light")
settings.setdefault("font_size", 14)
settings.setdefault("language", "zh_CN")
settings.setdefault("notifications", True)
return settings
config = get_user_settings("user123")
print(config)
# {'theme': 'light', 'font_size': 14, 'language': 'zh_CN', 'notifications': True}實戰應用:處理 API 響應
# 場景:解析不完整的 JSON 數據
def parse_user_data(api_response):
"""安全地解析 API 返回的用戶數據"""
user = {
"user_id": api_response.get("id"),
"username": api_response.get("username", "anonymous"),
"email": api_response.get("email", "no-email@example.com"),
"profile": {
"age": api_response.get("age", 0),
"city": api_response.get("city", "unknown"),
"bio": api_response.get("bio", "")
}
}
return user
# 即使 API 返回不完整,也不會崩潰
incomplete_data = {"id": 123, "username": "user123"}
result = parse_user_data(incomplete_data)
print(result)
# {'user_id': 123, 'username': 'user123', 'email': 'no-email@example.com', ...}2:defaultdict —— 自動初始化的字典
相比 get() 的優勢:
from collections import defaultdict
# 場景1:計數
# ? 傳統做法(重復代碼)
word_count = {}
words = ["apple", "banana", "apple", "cherry", "banana", "apple"]
for word in words:
if word in word_count:
word_count[word] += 1
else:
word_count[word] = 1
print(word_count) # {'apple': 3, 'banana': 2, 'cherry': 1}
# ? 用 defaultdict(簡潔)
word_count = defaultdict(int)
for word in words:
word_count[word] += 1
print(dict(word_count)) # {'apple': 3, 'banana': 2, 'cherry': 1}
# 場景2:分組
# ? 傳統做法
students = [
{"name": "張三", "class": "1班"},
{"name": "李四", "class": "2班"},
{"name": "王五", "class": "1班"},
]
class_groups = {}
for student in students:
cls = student["class"]
if cls notin class_groups:
class_groups[cls] = []
class_groups[cls].append(student["name"])
print(class_groups)
# ? 用 defaultdict(更簡潔)
class_groups = defaultdict(list)
for student in students:
class_groups[student["class"]].append(student["name"])
print(dict(class_groups))
# {'1班': ['張三', '王五'], '2班': ['李四']}場景3:嵌套字典
from collections import defaultdict
# 統計每個城市每個部門的員工數
employees = [
{"city": "北京", "dept": "技術", "count": 3},
{"city": "北京", "dept": "銷售", "count": 2},
{"city": "上海", "dept": "技術", "count": 4},
{"city": "上海", "dept": "銷售", "count": 1},
]
# ? 普通字典(代碼啰嗦)
result = {}
for emp in employees:
city = emp["city"]
dept = emp["dept"]
if city notin result:
result[city] = {}
if dept notin result[city]:
result[city][dept] = 0
result[city][dept] += emp["count"]
# ? defaultdict(優雅)
dept_stats = defaultdict(lambda: defaultdict(int))
for emp in employees:
dept_stats[emp["city"]][emp["dept"]] += emp["count"]
print(dict(dept_stats))
# {'北京': {'技術': 3, '銷售': 2}, '上海': {'技術': 4, '銷售': 1}}3:字典快速去重 —— 保留順序很重要
為什么要保留順序?
# Python 3.7+ 字典是有序的
data = [3, 1, 4, 1, 5, 9, 2, 6, 5]
# ? 不好的做法(順序被打亂)
unique = list(set(data))
print(unique) # [1, 2, 3, 4, 5, 6, 9](順序變了)
# ? 好的做法1:用字典保留順序
unique = list(dict.fromkeys(data))
print(unique) # [3, 1, 4, 5, 9, 2, 6](順序保留)
# ? 好的做法2:用集合+列表(如果不關心順序)
seen = set()
unique = [x for x in data ifnot (x in seen or seen.add(x))]
print(unique) # [3, 1, 4, 5, 9, 2, 6](也能保留順序)
# 實戰應用:用戶訪問日志去重
user_visits = [
("user1", "2024-01-01"),
("user2", "2024-01-01"),
("user1", "2024-01-02"),
("user1", "2024-01-01"), # 重復
("user3", "2024-01-01"),
]
# 去重但保留首次訪問的順序
unique_visits = list(dict.fromkeys(user_visits))
print(unique_visits)
# [('user1', '2024-01-01'), ('user2', '2024-01-01'), ('user1', '2024-01-02'), ('user3', '2024-01-01')]4:items() 迭代 —— 既要鍵也要值
最常見的錯誤:
data = {"a": 1, "b": 2, "c": 3}
# ? 錯誤的做法1(需要額外查詢)
for key in data:
print(key, data[key]) # 每次都要查詢一次
# ? 錯誤的做法2(只能看到鍵)
for key in data.keys():
print(key) # 看不到值
# ? 正確的做法(既要鍵又要值)
for key, value in data.items():
print(key, value)
# ? 如果只要值
for value in data.values():
print(value)實戰應用:數據轉換
# 場景1:修改字典的值
prices = {"apple": 3.99, "banana": 2.49, "cherry": 4.99}
# ? 不好的做法
for key in prices:
prices[key] = prices[key] * 1.1# 低效
# ? 好的做法(如果需要新字典)
increased_prices = {k: v * 1.1for k, v in prices.items()}
print(increased_prices)
# {'apple': 4.389, 'banana': 2.739, 'cherry': 5.489}
# 場景2:過濾字典
data = {"a": 1, "b": 2, "c": 3, "d": 4, "e": 5}
# 保留值大于 2 的項
filtered = {k: v for k, v in data.items() if v > 2}
print(filtered) # {'c': 3, 'd': 4, 'e': 5}
# 場景3:反轉鍵值
mapping = {"user1": "張三", "user2": "李四", "user3": "王五"}
# 反轉成名字到 ID 的映射
reverse_mapping = {v: k for k, v in mapping.items()}
print(reverse_mapping)
# {'張三': 'user1', '李四': 'user2', '王五': 'user3'}5:pop() 和 popitem() —— 刪除并獲取值
什么時候用 pop():
# pop():刪除指定鍵的值并返回
data = {"name": "張三", "age": 25, "city": "北京"}
# 刪除并獲取
age = data.pop("age")
print(age) # 25
print(data) # {'name': '張三', 'city': '北京'}
# 如果鍵不存在,提供默認值
phone = data.pop("phone", "no-phone")
print(phone) # no-phone
# 實戰應用:處理 API 參數
def create_user(user_data):
"""從 API 請求中提取參數"""
name = user_data.pop("name")
age = user_data.pop("age")
email = user_data.pop("email")
# 剩余的參數存為 extra
extra = user_data
return {"name": name, "age": age, "email": email, "extra": extra}
request = {"name": "張三", "age": 25, "email": "zhangsan@example.com", "phone": "123456"}
result = create_user(request)
print(result)
# {'name': '張三', 'age': 25, 'email': 'zhangsan@example.com', 'extra': {'phone': '123456'}}popitem() 的特殊用途:
# popitem():刪除最后添加的項(Python 3.7+)
recent_views = {"video1": "2024-01-05", "video2": "2024-01-04", "video3": "2024-01-03"}
# 獲取最近瀏覽的視頻
video, date = recent_views.popitem()
print(f"最近瀏覽:{video} - {date}")
# 實戰應用:緩存管理(限制緩存大小)
from collections import OrderedDict
def add_to_cache(cache, key, value, max_size=3):
"""添加到緩存,超過大小時刪除最舊的"""
if key in cache:
del cache[key] # 刪除舊值
cache[key] = value
# 超過容量,刪除最舊的項
if len(cache) > max_size:
oldest_key, oldest_value = cache.popitem(last=False)
print(f"刪除過期緩存:{oldest_key}")
cache = OrderedDict()
add_to_cache(cache, "a", 1)
add_to_cache(cache, "b", 2)
add_to_cache(cache, "c", 3)
print(dict(cache)) # {'a': 1, 'b': 2, 'c': 3}
add_to_cache(cache, "d", 4) # 超過大小,會刪除 'a'
print(dict(cache)) # {'b': 2, 'c': 3, 'd': 4}6:update() 和 | 合并 —— 快速合并字典
兩種合并方式的對比:
dict1 = {"a": 1, "b": 2}
dict2 = {"c": 3, "d": 4}
# ? 低效的做法
for key, value in dict2.items():
dict1[key] = value
# ? 方式1:update()(原地修改)
dict1 = {"a": 1, "b": 2}
dict1.update(dict2)
print(dict1) # {'a': 1, 'b': 2, 'c': 3, 'd': 4}
# ? 方式2:| 操作符(Python 3.9+,創建新字典)
dict1 = {"a": 1, "b": 2}
dict3 = dict1 | dict2
print(dict3) # {'a': 1, 'b': 2, 'c': 3, 'd': 4}
print(dict1) # 原字典不變 {'a': 1, 'b': 2}
# 處理鍵沖突
user1 = {"name": "張三", "age": 25}
user2 = {"age": 26, "city": "北京"}
# 后者覆蓋前者
merged = user1 | user2
print(merged) # {'name': '張三', 'age': 26, 'city': '北京'}
# 實戰應用:合并配置
default_config = {"host": "localhost", "port": 5432, "debug": False}
user_config = {"host": "remote.server", "debug": True}
# 用戶配置覆蓋默認配置
final_config = default_config | user_config
print(final_config)
# {'host': 'remote.server', 'port': 5432, 'debug': True}二、集合的實用技巧(7-10)
7:集合去重 —— 最快的方式
為什么集合最快?
import time
# 生成有很多重復的數據
data = list(range(10000)) * 10# 10 萬個數字,有重復
print(f"原始長度:{len(data)}") # 100000
# ? 不好的做法1(非常慢,O(n2))
def remove_dup_slow(data):
result = []
for item in data:
if item notin result: # 每次都搜索整個列表
result.append(item)
return result
# ? 不好的做法2(雖然快但順序打亂)
def remove_dup_no_order(data):
return list(set(data))
# ? 好的做法(快且保留順序)
def remove_dup_fast(data):
seen = set()
result = []
for item in data:
if item notin seen: # O(1) 查詢
seen.add(item)
result.append(item)
return result
# ? 或者用 dict(也能保留順序)
def remove_dup_dict(data):
return list(dict.fromkeys(data))
# 測試性能
print("\n性能對比:")
start = time.time()
result1 = remove_dup_slow(data[:1000]) # 只用前 1000 個(太慢了)
time1 = time.time() - start
print(f"低效方法:{time1:.4f}s")
start = time.time()
result2 = remove_dup_fast(data)
time2 = time.time() - start
print(f"集合方法:{time2:.4f}s")
start = time.time()
result3 = remove_dup_dict(data)
time3 = time.time() - start
print(f"字典方法:{time3:.4f}s")
# 實戰應用:找出用戶瀏覽過的視頻
user_history = ["video1", "video2", "video1", "video3", "video2", "video4", "video1"]
# 快速得到去重的視頻列表
watched_videos = list(dict.fromkeys(user_history))
print(f"瀏覽過的視頻(保留順序):{watched_videos}")8:集合成員檢測 —— 快速查詢
為什么不用列表?
# 場景:檢查郵箱是否在黑名單中
blacklist = ["spam@example.com", "fake@test.com", "junk@mail.com"] * 100# 300 個
test_email = "spam@example.com"
# ? 用列表(O(n),需要遍歷)
if test_email in blacklist:
print("郵箱被黑名單攔截")
# ? 用集合(O(1),直接查詢)
blacklist_set = set(blacklist)
if test_email in blacklist_set:
print("郵箱被黑名單攔截")
# 實戰應用:權限檢查
# 獲取允許的操作
allowed_actions = {"read", "write", "delete", "share"}
# 檢查用戶是否有權限
user_action = "read"
if user_action in allowed_actions:
print(f"允許操作:{user_action}")
else:
print(f"拒絕操作:{user_action}")
# 實戰應用:過濾有效數據
valid_statuses = {"active", "inactive", "pending", "suspended"}
received_statuses = ["active", "inactive", "invalid", "pending", "unknown"]
# 快速過濾出有效狀態
valid = [s for s in received_statuses if s in valid_statuses]
print(f"有效狀態:{valid}") # ['active', 'inactive', 'pending']9:集合運算 —— 并集、交集、差集
日常最常用的三種操作:
# 創建兩個集合
team_a = {"張三", "李四", "王五", "趙六"}
team_b = {"王五", "趙六", "孫七", "周八"}
# 1. 交集(兩個集合的公共部分)—— 最常用
common = team_a & team_b
print(f"兩個團隊都有的人:{common}") # {'王五', '趙六'}
# 2. 差集(在 A 但不在 B)
only_in_a = team_a - team_b
print(f"只在 A 團隊的人:{only_in_a}") # {'張三', '李四'}
# 3. 并集(兩個集合的所有元素)
all_members = team_a | team_b
print(f"所有成員:{all_members}") # {'張三', '李四', '王五', '趙六', '孫七', '周八'}
# 實戰應用1:找出共同興趣
user1_interests = {"足球", "籃球", "游戲", "編程", "讀書"}
user2_interests = {"足球", "排球", "電影", "編程", "旅游"}
common_interests = user1_interests & user2_interests
print(f"共同興趣:{common_interests}") # {'足球', '編程'}
# 實戰應用2:權限檢查
admin_permissions = {"read", "write", "delete", "manage_users"}
user_permissions = {"read", "write"}
# 用戶缺少的權限
missing_permissions = admin_permissions - user_permissions
print(f"用戶缺少的權限:{missing_permissions}") # {'delete', 'manage_users'}
# 實戰應用3:數據驗證
allowed_countries = {"中國", "美國", "日本", "韓國", "英國"}
submitted_countries = ["中國", "美國", "加拿大", "日本"]
# 找出無效的國家
invalid = set(submitted_countries) - allowed_countries
print(f"無效國家:{invalid}") # {'加拿大'}
valid = set(submitted_countries) & allowed_countries
print(f"有效國家:{valid}") # {'中國', '美國', '日本'}10:frozenset —— 不可變集合
什么時候用 frozenset?
# 場景1:集合作為字典的鍵或集合的元素
# ? 普通集合不行(可變的)
try:
d = {{1, 2, 3}: "value"}
except TypeError:
print("錯誤:集合不能作為字典鍵")
# ? frozenset 可以(不可變的)
d = {frozenset([1, 2, 3]): "value"}
print(d) # {frozenset({1, 2, 3}): 'value'}
# 場景2:集合的集合
# ? 不行
try:
s = {{1, 2}, {3, 4}}
except TypeError:
print("錯誤:不能創建集合的集合")
# ? frozenset 可以
s = {frozenset([1, 2]), frozenset([3, 4])}
print(s) # {frozenset({1, 2}), frozenset({3, 4})}
# 場景3:實際應用 —— 權限組
# 定義幾種權限組合
permission_groups = {
frozenset(["read"]): "訪客",
frozenset(["read", "write"]): "編輯",
frozenset(["read", "write", "delete"]): "管理員",
}
# 根據權限找出用戶角色
user_perms = frozenset(["read", "write", "delete"])
role = permission_groups.get(user_perms, "未知角色")
print(f"用戶角色:{role}") # 管理員
# 場景4:緩存集合操作的結果
# frozenset 可以被哈希,可以用作緩存的鍵
from functools import lru_cache
@lru_cache(maxsize=128)
def get_common_elements(set1_frozen, set2_frozen):
"""緩存兩個集合的交集運算"""
return set1_frozen & set2_frozen
# 使用 frozenset 調用
result = get_common_elements(
frozenset([1, 2, 3, 4]),
frozenset([3, 4, 5, 6])
)
print(result) # frozenset({3, 4})三、實戰案例
案例1:用戶標簽系統
from collections import defaultdict
class TagManager:
def __init__(self):
# user_id -> 標簽集合
self.user_tags = defaultdict(set)
# 標簽 -> 用戶集合
self.tag_users = defaultdict(set)
def add_tag(self, user_id, tag):
"""為用戶添加標簽"""
self.user_tags[user_id].add(tag)
self.tag_users[tag].add(user_id)
def get_user_tags(self, user_id):
"""獲取用戶的所有標簽"""
return self.user_tags.get(user_id, set())
def find_similar_users(self, user_id):
"""找出與該用戶標簽相同的用戶"""
user_tags = self.user_tags[user_id]
ifnot user_tags:
return set()
# 找有相同標簽的用戶
similar = set()
for tag in user_tags:
similar.update(self.tag_users[tag])
similar.discard(user_id) # 移除自己
return similar
def find_users_by_tags(self, tags):
"""找出擁有指定標簽的用戶"""
ifnot tags:
return set()
tags_set = set(tags)
# 找有所有這些標簽的用戶
users_with_all = None
for tag in tags_set:
if users_with_all isNone:
users_with_all = self.tag_users[tag].copy()
else:
users_with_all &= self.tag_users[tag]
return users_with_all or set()
# 使用
manager = TagManager()
manager.add_tag("user1", "python")
manager.add_tag("user1", "django")
manager.add_tag("user2", "python")
manager.add_tag("user2", "flask")
manager.add_tag("user3", "django")
print(f"user1 的標簽:{manager.get_user_tags('user1')}")
print(f"與 user1 相似的用戶:{manager.find_similar_users('user1')}")
print(f"有 python 標簽的用戶:{manager.tag_users['python']}")案例2:API 參數驗證
def validate_api_request(request_data, required_fields, optional_fields):
"""驗證 API 請求參數"""
received_fields = set(request_data.keys())
required_set = set(required_fields)
optional_set = set(optional_fields)
allowed_fields = required_set | optional_set
# 檢查必需字段
missing_fields = required_set - received_fields
if missing_fields:
returnFalse, f"缺少必需字段:{missing_fields}"
# 檢查非法字段
invalid_fields = received_fields - allowed_fields
if invalid_fields:
returnFalse, f"包含非法字段:{invalid_fields}"
# 驗證通過
returnTrue, "驗證成功"
# 使用
required = ["name", "age", "email"]
optional = ["phone", "city", "bio"]
# 測試 1:缺少必需字段
request1 = {"name": "張三", "age": 25}
valid, msg = validate_api_request(request1, required, optional)
print(f"請求1:{msg}") # 缺少必需字段:{'email'}
# 測試 2:包含非法字段
request2 = {"name": "張三", "age": 25, "email": "test@example.com", "password": "secret"}
valid, msg = validate_api_request(request2, required, optional)
print(f"請求2:{msg}") # 包含非法字段:{'password'}
# 測試 3:驗證通過
request3 = {"name": "張三", "age": 25, "email": "test@example.com", "phone": "123456"}
valid, msg = validate_api_request(request3, required, optional)
print(f"請求3:{msg}") # 驗證成功四、總結
1. 什么時候用字典,什么時候用集合?
用字典的情況:
- 需要鍵值對應關系(如用戶名到用戶信息)
- 需要通過鍵快速查找值
- 需要存儲復雜的結構化數據
用集合的情況:
- 需要存儲獨一無二的值
- 需要快速的成員檢測
- 需要進行數學運算(并集、交集、差集)
- 需要去重
2. 常見錯誤總結
# ? 錯誤1:直接訪問字典鍵
data = {"a": 1}
value = data["b"] # KeyError
# ? 改正
value = data.get("b", 0)
# ? 錯誤2:用列表做去重
data = [1, 2, 2, 3, 3, 3]
unique = []
for item in data:
if item notin unique: # O(n2) 復雜度
unique.append(item)
# ? 改正
unique = list(dict.fromkeys(data)) # O(n) 復雜度
# ? 錯誤3:在集合中存儲可變對象
s = {1, 2, [3, 4]} # TypeError: unhashable type: 'list'
# ? 改正
s = {1, 2, (3, 4)} # 用元組
# ? 錯誤4:修改字典時迭代
d = {"a": 1, "b": 2, "c": 3}
for key in d:
del d[key] # RuntimeError: dictionary changed size
# ? 改正
d = {"a": 1, "b": 2, "c": 3}
for key in list(d.keys()):
del d[key]五、一分鐘快速參考
字典最常用的五個操作:
d = {}
# 1. 安全獲取值
value = d.get("key", default_value)
# 2. 遍歷鍵值
for k, v in d.items():
pass
# 3. 刪除并獲取
value = d.pop("key", default)
# 4. 快速合并
d1 | d2 # Python 3.9+
# 5. 自動初始化
from collections import defaultdict
dd = defaultdict(list)
dd["key"].append(1)集合最常用的五個操作:
s = set()
# 1. 快速去重
unique = list(dict.fromkeys(data))
# 2. 成員檢測
if item in s:
pass
# 3. 交集
common = s1 & s2
# 4. 差集
difference = s1 - s2
# 5. 添加和刪除
s.add(item)
s.remove(item)六、練手題目
題目 1:詞頻統計
# 統計文本中每個單詞的出現頻率
text = "python python java python ruby python java"
# 你的答案:
word_count = defaultdict(int)
for word in text.split():
word_count[word] += 1
# 或者更簡潔
word_count = {}
for word in text.split():
word_count[word] = word_count.get(word, 0) + 1
print(word_count)
# {'python': 4, 'java': 2, 'ruby': 1}題目 2:兩數之和
# 在列表中找出兩個數,使其和等于目標值
def find_two_sum(numbers, target):
"""找出和為 target 的兩個數"""
seen = set()
for num in numbers:
complement = target - num
if complement in seen:
return (complement, num)
seen.add(num)
returnNone
# 測試
print(find_two_sum([2, 7, 11, 15], 9)) # (2, 7)
print(find_two_sum([1, 2, 3, 4], 10)) # None題目 3:分組
# 按年齡將學生分組
students = [
{"name": "張三", "age": 20},
{"name": "李四", "age": 21},
{"name": "王五", "age": 20},
{"name": "趙六", "age": 21},
]
# 你的答案:
from collections import defaultdict
age_groups = defaultdict(list)
for student in students:
age_groups[student["age"]].append(student["name"])
print(dict(age_groups))
# {20: ['張三', '王五'], 21: ['李四', '趙六']}七、建議
立即動手:
- 改造你的代碼:找一個用列表檢查成員的地方,改成用集合
- 使用 defaultdict:在計數問題中用 defaultdict 替代傳統方式
- 安全訪問:把所有的 d["key"] 改成 d.get("key", default)
繼續深入:
- 學習 OrderedDict 和 Counter(都在 collections 模塊中)
- 理解字典和集合的底層實現(哈希表)
- 掌握更多集合運算(issubset、isdisjoint 等)
熟練掌握這些技巧后,你會發現自己寫的代碼不僅更快,而且 Bug 更少。這就是 Python的優雅之處——簡單的工具,強大的能力。


































