import heapq
def uniform_cost_search(initial_state, goal_test, actions, step_cost):
"""
initial_state: 시작 상태
goal_test(s): 상태 s가 목표면 True
actions(s): 상태 s에서 가능한 다음 상태 리스트
step_cost(s, s_next): s -> s_next 이동 비용 (양수 가정)
반환: start → goal 경로(list) 또는 None
"""
nodes = [(initial_state, None, 0.0)]
frontier = [(0.0, 0)]
heapq.heapify(frontier)
frontier_costs = {initial_state: 0.0}
explored = set()
if goal_test(initial_state):
return [initial_state]
while frontier:
cost, node_idx = heapq.heappop(frontier)
state, parent_idx, path_cost = nodes[node_idx]
if state in frontier_costs and cost != frontier_costs[state]:
continue
if goal_test(state):
path = []
i = node_idx
while i is not None:
path.append(nodes[i][0])
i = nodes[i][1]
return list(reversed(path))
explored.add(state)
frontier_costs.pop(state, None)
for nxt in actions(state):
new_cost = path_cost + step_cost(state, nxt)
in_explored = (nxt in explored)
in_frontier = (nxt in frontier_costs)
if not in_explored and not in_frontier:
nodes.append((nxt, node_idx, new_cost))
child_idx = len(nodes) - 1
heapq.heappush(frontier, (new_cost, child_idx))
frontier_costs[nxt] = new_cost
elif in_frontier and new_cost < frontier_costs[nxt]:
nodes.append((nxt, node_idx, new_cost))
child_idx = len(nodes) - 1
heapq.heappush(frontier, (new_cost, child_idx))
frontier_costs[nxt] = new_cost
return None