{"id":8793,"date":"2025-10-09T16:19:33","date_gmt":"2025-10-09T16:19:33","guid":{"rendered":"https:\/\/pokecon.jp\/job\/?p=8793"},"modified":"2025-10-09T16:19:33","modified_gmt":"2025-10-09T16:19:33","slug":"llmx%e5%bc%b7%e5%8c%96%e5%ad%a6%e7%bf%92%e3%81%ae%e6%96%b0%e3%81%97%e3%81%84%e3%83%91%e3%83%a9%e3%83%80%e3%82%a4%e3%83%a0-agentic-rl%e3%81%ae%e7%a0%94%e7%a9%b6%e7%b4%b9%e4%bb%8b","status":"publish","type":"post","link":"https:\/\/pokecon.jp\/job\/8793\/","title":{"rendered":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb"},"content":{"rendered":"\n<\/p>\n<div>\n<h2 id=\"%E3%81%AF%E3%81%98%E3%82%81%E3%81%AB\" data-line=\"0\" class=\"code-line\">\n \u306f\u3058\u3081\u306b<\/h2>\n<p data-line=\"1\" class=\"code-line\">\u672c\u8a18\u4e8b\u3067\u306f\u3001LLM\u7814\u7a76\u3067\u6ce8\u76ee\u3092\u96c6\u3081\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u578b\u5f37\u5316\u5b66\u7fd2\uff08Agentic Reinforcement Learning\u3001Agentic RL\uff09\u306e\u30b5\u30fc\u30d9\u30a4\u8ad6\u6587<br \/>\u300cThe Landscape of Agentic Reinforcement Learning for LLMs: A Survey\u300d\u3092\u8aad\u307f\u3001\u79c1\u306a\u308a\u306e\u7406\u89e3\u3068\u8981\u70b9\u3092\u6574\u7406\u3057\u3066\u7d39\u4ecb\u3057\u307e\u3059\u3002500\u4ef6\u4ee5\u4e0a\u306e\u6587\u732e\u3092\u5f15\u7528\u3059\u308b\u30dc\u30ea\u30e5\u30fc\u30e0\u306e\u3042\u308b\u8ad6\u6587\u3067\u3059\u304c\u3001\u3053\u3053\u3067\u306f\u91cd\u8981\u3060\u3068\u611f\u3058\u305f\u30c8\u30d4\u30c3\u30af\u306b\u7d5e\u3063\u3066\u53d6\u308a\u4e0a\u3052\u307e\u3059\u3002Agentic RL\u306b\u8208\u5473\u304c\u3042\u308b\u65b9\u3084\u3001LLM\u306b\u5bfe\u3059\u308b\u5f37\u5316\u5b66\u7fd2\u306e\u6700\u65b0\u52d5\u5411\u3092\u77e5\u308a\u305f\u3044\u65b9\u306e\u53c2\u8003\u306b\u306a\u308c\u3070\u5e78\u3044\u3067\u3059\u3002<\/p>\n<h3 id=\"%E6%9C%AC%E8%A8%98%E4%BA%8B%E3%81%AE%E5%89%8D%E6%8F%90\" data-line=\"4\" class=\"code-line\">\n \u672c\u8a18\u4e8b\u306e\u524d\u63d0<\/h3>\n<ul data-line=\"5\" class=\"code-line\">\n<li data-line=\"5\" class=\"code-line\">PPO\u3084GRPO\u3068\u3044\u3063\u305fRL\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u306e\u89e3\u8aac\u306f\u4ed6\u306e\u591a\u304f\u306e\u8a18\u4e8b\u3067\u65e2\u306b\u8aac\u660e\u3055\u308c\u3066\u3044\u308b\u305f\u3081\u3001\u672c\u8a18\u4e8b\u3067\u306f\u5272\u611b\u3057\u307e\u3059\u3002<\/li>\n<li data-line=\"6\" class=\"code-line\">DeepSeek-R1\u306e\u7814\u7a76\u3092\u524d\u63d0\u3068\u3059\u308b\u7b87\u6240\u304c\u3044\u304f\u3064\u304b\u3042\u308a\u307e\u3059\u3002\u672a\u8aad\u306e\u65b9\u306f\u539f\u8457\u8ad6\u6587\u3084\u89e3\u8aac\u8a18\u4e8b\u306e\u53c2\u7167\u3092\u304a\u3059\u3059\u3081\u3057\u307e\u3059\u3002\u79c1\u306f\u4ee5\u4e0b\u306e\u30d6\u30ed\u30b0\u306b\u5927\u5909\u304a\u4e16\u8a71\u306b\u306a\u308a\u307e\u3057\u305f\u3002<\/li>\n<\/ul>\n<p data-line=\"8\" class=\"code-line\"><span class=\"embed-block zenn-embedded zenn-embedded-card\"><iframe id=\"zenn-embedded__4a4051bfcf1f9\" src=\"https:\/\/embed.zenn.studio\/card#zenn-embedded__4a4051bfcf1f9\" data-content=\"https%3A%2F%2Fzenn.dev%2Fasap%2Farticles%2F34237ad87f8511\" frameborder=\"0\" scrolling=\"no\" loading=\"lazy\"><\/iframe><\/span><a target=\"_blank\" href=\"https:\/\/zenn.dev\/asap\/articles\/34237ad87f8511\" style=\"display:none\" target=\"_blank\">https:\/\/zenn.dev\/asap\/articles\/34237ad87f8511<\/a><\/p>\n<p data-line=\"10\" class=\"code-line\"><span class=\"embed-block zenn-embedded zenn-embedded-card\"><iframe id=\"zenn-embedded__3c874f5ca8148\" src=\"https:\/\/embed.zenn.studio\/card#zenn-embedded__3c874f5ca8148\" data-content=\"https%3A%2F%2Fhoromary.hatenablog.com%2Fentry%2F2025%2F01%2F26%2F204545\" frameborder=\"0\" scrolling=\"no\" loading=\"lazy\"><\/iframe><\/span><a target=\"_blank\" href=\"https:\/\/horomary.hatenablog.com\/entry\/2025\/01\/26\/204545\" style=\"display:none\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/horomary.hatenablog.com\/entry\/2025\/01\/26\/204545<\/a><\/p>\n<h2 id=\"3%E8%A1%8C%E3%81%A7%E3%81%BE%E3%81%A8%E3%82%81\" data-line=\"12\" class=\"code-line\">\n 3\u884c\u3067\u307e\u3068\u3081<\/h2>\n<ul data-line=\"13\" class=\"code-line\">\n<li data-line=\"13\" class=\"code-line\">Agentic RL\u3068\u306f\u3001LLM\u3092\u5b66\u7fd2\u53ef\u80fd\u306a\u65b9\u7b56\u3068\u3057\u3066\u6271\u3044\u3001\u74b0\u5883\u3068\u5bfe\u8a71\u3057\u306a\u304c\u3089\u9577\u671f\u7684\u306a\u76ee\u6a19\u3092\u9054\u6210\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3057\u3066\u306e\u80fd\u529b\u3092\u5f37\u5316\u5b66\u7fd2\u3067\u5411\u4e0a\u3055\u305b\u308b\u67a0\u7d44\u307f\u3067\u3042\u308b<\/li>\n<li data-line=\"14\" class=\"code-line\">\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u6027\u80fd\u5411\u4e0a\u306e\u624b\u6bb5\u3068\u3057\u3066\u30d7\u30ed\u30f3\u30d7\u30c8\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3084\u6559\u5e2b\u3042\u308a\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0(SFT)\u306b\u52a0\u3048\u3001RL\u304c\u91cd\u8981\u306a\u5f79\u5272\u3092\u679c\u305f\u3057\u3066\u3044\u308b<\/li>\n<li data-line=\"15\" class=\"code-line\">\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u30b3\u30a2\u80fd\u529b\u3067\u3042\u308b6\u3064\u306e\u80fd\u529b\uff08\u63a8\u8ad6\u3001\u30c4\u30fc\u30eb\u4f7f\u7528\u3001\u8a18\u61b6\u3001\u8a08\u753b\u3001\u81ea\u5df1\u6539\u5584\u3001\u77e5\u899a\uff09\u3092RL\u3067\u6539\u5584\u3057\u3066\u3044\u308b<\/li>\n<\/ul>\n<h2 id=\"llm%C3%97%E5%BC%B7%E5%8C%96%E5%AD%A6%E7%BF%92%E3%81%AE%E5%8B%95%E5%90%91\" data-line=\"17\" class=\"code-line\">\n LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u52d5\u5411<\/h2>\n<p data-line=\"18\" class=\"code-line\">Agentic RL\u306e\u8a71\u306b\u5165\u308b\u524d\u306b\u3001\u307e\u305a\u306fLLM\u306b\u5bfe\u3057\u3066RL\u304c\u3069\u306e\u3088\u3046\u306b\u9069\u7528\u3055\u308c\u3066\u304d\u305f\u304b\u3092\u7c21\u5358\u306b\u632f\u308a\u8fd4\u308a\u307e\u3059\u3002<\/p>\n<h3 id=\"%E9%81%B8%E5%A5%BD%E3%83%81%E3%83%A5%E3%83%BC%E3%83%8B%E3%83%B3%E3%82%B0\" data-line=\"20\" class=\"code-line\">\n \u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0<\/h3>\n<p data-line=\"21\" class=\"code-line\">ChatGPT\uff082022\u5e7411\u6708\uff09\u4ee5\u964d\u3001LLM\u5bfe\u8a71\u30b7\u30b9\u30c6\u30e0\u304c\u6025\u901f\u306b\u666e\u53ca\u3057\u307e\u3057\u305f\u3002LLM\u306fWeb\u5927\u898f\u6a21\u30b3\u30fc\u30d1\u30b9\u3067\u4e8b\u524d\u5b66\u7fd2\u3057\u3001\u4eba\u9593\u306e\u6307\u793a\u306b\u5f93\u3063\u3066\u5fdc\u7b54\u3059\u308b\u632f\u308b\u821e\u3044\u3092\u7372\u5f97\u3059\u308b\u305f\u3081\u306b\u6559\u5e2b\u3042\u308a\u5b66\u7fd2\u306b\u3088\u308b\u6307\u793a\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u884c\u3044\u307e\u3059\u3002\u305f\u3060\u3057\u3053\u308c\u3060\u3051\u3067\u306f\u4eba\u9593\u306e\u597d\u307f\u304b\u3089\u5916\u308c\u305f\u308a\u502b\u7406\u7684\u306b\u4e0d\u9069\u5207\u306a\u5fdc\u7b54\u304c\u751f\u3058\u308b\u3053\u3068\u304c\u3042\u308b\u305f\u3081RL\u3092\u7528\u3044\u3066\u5fdc\u7b54\u3092\u4eba\u9593\u306e\u597d\u307f\u306b\u8fd1\u3065\u3051\u308b\u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u304c\u884c\u308f\u308c\u3066\u304d\u307e\u3057\u305f\u3002\u4ee3\u8868\u4f8b\u304c\u4eba\u9593\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u306b\u3088\u308b\u5f37\u5316\u5b66\u7fd2(RLHF)\u3067\u3001\u4eba\u9593\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3067\u5b66\u7fd2\u3057\u305f\u5831\u916c\u30e2\u30c7\u30eb\u3092\u7528\u3044\u3066\u5fdc\u7b54\u306b\u5831\u916c\u3092\u4e0e\u3048\u6700\u9069\u5316\u3057\u307e\u3059\u3002\u305d\u306e\u4ed6AI\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u3092\u7528\u3044\u308bRLAIF\u3084\u3001\u5831\u916c\u30e2\u30c7\u30eb\/RL\u3092\u4f7f\u308f\u305a\u9078\u597d\u3092\u76f4\u63a5\u5b66\u3076DPO\u3082\u3042\u308a\u307e\u3059\u3002\u672c\u8a18\u4e8b\u3067\u306f\u3001\u3053\u3046\u3057\u305f\u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u624b\u6cd5\u3092\u7dcf\u79f0\u3057\u3066Preference-Based Reinforcement Fine-Tuning\uff08PBRFT\uff09\u3068\u547c\u3073\u3001\u672c\u8ad6\u6587\u3067\u306f\u5f93\u6765\u306eRL\u3068\u4f4d\u7f6e\u4ed8\u3051\u307e\u3059\u3002<\/p>\n<h3 id=\"%E6%8E%A8%E8%AB%96%E8%83%BD%E5%8A%9B%E3%81%AE%E5%90%91%E4%B8%8A\" data-line=\"23\" class=\"code-line\">\n \u63a8\u8ad6\u80fd\u529b\u306e\u5411\u4e0a<\/h3>\n<p data-line=\"24\" class=\"code-line\">\u521d\u671f\u306eLLM\u3078\u306eRL\u9069\u7528\u306f\u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u304c\u4e3b\u3067\u3057\u305f\u304c\u30012024\u5e749\u6708\u306bOpenAI\u304b\u3089\u521d\u306e\u63a8\u8ad6\u30e2\u30c7\u30eb\u3067\u3042\u308bOpenAI o1\u304c\u767a\u8868\u3055\u308c\u307e\u3057\u305f\u3002RL\u306b\u3088\u308a\u9577\u8003\u3057\u3066\u7b54\u3048\u3092\u5c0e\u304f\u80fd\u529b\u3092\u9ad8\u3081\u305f\u3053\u3068\u304c\u3001\u30b7\u30b9\u30c6\u30e0\u30ab\u30fc\u30c9\u3067\u5831\u544a\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u5177\u4f53\u7684\u306a\u624b\u6cd5\u306f\u516c\u958b\u3055\u308c\u3066\u3044\u307e\u305b\u3093\u3067\u3057\u305f\u304c\u30012025\u5e741\u6708\u306b\u767b\u5834\u3057\u305fDeepSeek-R1\u306f\u3001\u4fa1\u5024\u8a55\u4fa1\u30e2\u30c7\u30eb\u3092\u4e0d\u8981\u3068\u3059\u308bGRPO\u3068\u3044\u3046RL\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3084\u3001\u7b54\u3048\u304c\u4e00\u610f\u306b\u5b9a\u307e\u308b\u554f\u984c\u306b\u5bfe\u3057\u3066\u691c\u8a3c\u53ef\u80fd\u306a\u30eb\u30fc\u30eb\u30d9\u30fc\u30b9\u5831\u916c\u3092\u7528\u3044\u3001\u5831\u916c\u30e2\u30c7\u30eb\u3092\u53d6\u308a\u9664\u3044\u3066\u5b66\u7fd2\u30b3\u30b9\u30c8\u3092\u4e0b\u3052\u308b\u306a\u3069\u306e\u5177\u4f53\u7b56\u3068\u3068\u3082\u306b\u3001RL\u304cLLM\u306e\u63a8\u8ad6\u30fb\u6c4e\u5316\u80fd\u529b\u3092\u98db\u8e8d\u7684\u306b\u5411\u4e0a\u3055\u305b\u308b\u3053\u3068\u3092\u793a\u3057\u307e\u3057\u305f\u3002\u3053\u308c\u3092\u6a5f\u306b\u3001\u5f93\u6765\u306e\u300c\u30a2\u30e9\u30a4\u30e1\u30f3\u30c8\u76ee\u7684\u300d\u304b\u3089\u300c\u80fd\u529b\u5411\u4e0a\u76ee\u7684\u300d\u3078\u3068RL\u306e\u9069\u7528\u304c\u5e83\u304c\u308a\u3001\u3053\u306e\u6d41\u308c\u304c\u672c\u8a18\u4e8b\u306e\u4e3b\u984c\u3067\u3042\u308bAgentic RL\u3078\u3068\u3064\u306a\u304c\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<h3 id=\"%E3%83%84%E3%83%BC%E3%83%AB%E5%88%A9%E7%94%A8%E6%80%A7%E8%83%BD%E3%81%AE%E5%90%91%E4%B8%8A\" data-line=\"26\" class=\"code-line\">\n \u30c4\u30fc\u30eb\u5229\u7528\u6027\u80fd\u306e\u5411\u4e0a<\/h3>\n<p data-line=\"27\" class=\"code-line\">2025\u5e742\u6708\u306b\u767a\u8868\u3055\u308c\u305fChatGPT\u306eDeep Research\uff08Web\u691c\u7d22\u3092\u7528\u3044\u3066\u30ec\u30dd\u30fc\u30c8\u3092\u4f5c\u6210\u3059\u308b\u6a5f\u80fd\uff09\u306b\u3082\u3001RL\u304c\u9069\u7528\u3055\u308c\u3066\u3044\u308b\u3068\u5831\u544a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<br \/>\u307e\u305fOpenAI o1\u306e\u5f8c\u7d99\u3067\u3042\u308bo3\u30e2\u30c7\u30eb\u3067\u306f\u3001\u63a8\u8ad6\u80fd\u529b\u306b\u52a0\u3048\u3066\u3001\u3044\u3064\u30fb\u3069\u306e\u3088\u3046\u306b\u30c4\u30fc\u30eb\u3092\u4f7f\u7528\u3059\u308b\u306e\u304c\u826f\u3044\u304b\u3068\u3044\u3063\u305f\u30c4\u30fc\u30eb\u5229\u7528\u6027\u80fd\u306b\u3064\u3044\u3066\u3082\u3001RL\u306b\u3088\u3063\u3066\u6027\u80fd\u304c\u5411\u4e0a\u3057\u3066\u3044\u308b\u3053\u3068\u304c\u5831\u544a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<br \/>\u3053\u306e\u3088\u3046\u306bLLM\u306b\u5bfe\u3059\u308bRL\u306e\u9069\u7528\u306f\u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u304b\u3089LLM\u306e\u63a8\u8ad6\u80fd\u529b\u306e\u5411\u4e0a\u3001\u305d\u3057\u3066\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3057\u3066\u306e\u30c4\u30fc\u30eb\u5229\u7528\u6027\u80fd\u306e\u5411\u4e0a\u3078\u3068\u5e83\u304c\u308a\u3092\u898b\u305b\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u6b74\u53f2\u7684\u80cc\u666f\u3092\u8e0f\u307e\u3048\u305f\u4e0a\u3067\u672c\u984c\u3067\u3042\u308bAgentic RL\u306b\u3064\u3044\u3066\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n<h2 id=\"agentic-rl%E3%81%A8%E3%81%AF\" data-line=\"31\" class=\"code-line\">\n Agentic RL\u3068\u306f<\/h2>\n<p data-line=\"32\" class=\"code-line\">\u307e\u305a\u306f\u672c\u8ad6\u6587\u306b\u304a\u3051\u308bAgentic RL\u306e\u5b9a\u7fa9\u3092\u5f15\u7528\u3057\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--Xw4McdUz--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/d89de0fed2c57db6c5e6c3c0.png%3Fsha%3D8c48c5ba701935ffff4d3b752be2d8d4547e56cf\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<blockquote data-line=\"34\" class=\"code-line\">\n<p data-line=\"34\" class=\"code-line\">Agentic RL\u3068\u306f\u3001LLM\u3092\u3001\u5358\u767a\u306e\u51fa\u529b\u6574\u5408\u6027\u3084\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u6027\u80fd\u306e\u6700\u9069\u5316\u3092\u76ee\u7684\u3068\u3057\u305f\u9759\u7684\u306a\u6761\u4ef6\u4ed8\u304d\u751f\u6210\u30e2\u30c7\u30eb\u3068\u3057\u3066\u6271\u3046\u306e\u3067\u306f\u306a\u304f\u3001\u9010\u6b21\u7684\u306a\u610f\u601d\u6c7a\u5b9a\u30eb\u30fc\u30d7\u306e\u4e2d\u306b\u57cb\u3081\u8fbc\u307e\u308c\u305f\u300c\u5b66\u7fd2\u53ef\u80fd\u306a\u65b9\u7b56\u300d\u3068\u3057\u3066\u6349\u3048\u308b\u30d1\u30e9\u30c0\u30a4\u30e0\u3092\u6307\u3057\u307e\u3059\u3002\u3053\u306e\u67a0\u7d44\u307f\u3067\u306f\u3001RL\u306b\u3088\u3063\u3066\u30e2\u30c7\u30eb\u306b\u8a08\u753b\u7acb\u6848\u30fb\u63a8\u8ad6\u30fb\u30c4\u30fc\u30eb\u5229\u7528\u30fb\u8a18\u61b6\u4fdd\u6301\u30fb\u81ea\u5df1\u7701\u5bdf\u306a\u3069\u306e\u81ea\u5f8b\u7684\u306a\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u7684\u80fd\u529b\u3092\u4e0e\u3048\u308b\u3053\u3068\u3067\u3001\u90e8\u5206\u7684\u306b\u89b3\u6e2c\u53ef\u80fd\u3067\u52d5\u7684\u306a\u74b0\u5883\u306b\u304a\u3044\u3066\u3001\u9577\u671f\u7684\u306a\u8a8d\u77e5\u7684\u30fb\u5bfe\u8a71\u7684\u884c\u52d5\u304c\u81ea\u767a\u7684\u306b\u73fe\u308c\u308b\u3053\u3068\u3092\u53ef\u80fd\u306b\u3057\u307e\u3059\u3002<\/p>\n<\/blockquote>\n<p data-line=\"36\" class=\"code-line\">\u3064\u307e\u308a\u3001Agentic RL\u3068\u306f<strong>LLM\u3092\u81ea\u5f8b\u7684\u306b\u884c\u52d5\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3057\u3066\u6349\u3048\u3001\u74b0\u5883\u3068\u5bfe\u8a71\u3057\u306a\u304c\u3089\u9577\u671f\u7684\u306a\u76ee\u6a19\u3092\u9054\u6210\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3057\u3066\u306e\u80fd\u529b\u3092RL\u3067\u5411\u4e0a\u3055\u305b\u308b\u67a0\u7d44\u307f<\/strong>\u3068\u3044\u3048\u307e\u3059\u3002<br \/>\u3082\u3046\u5c11\u3057\u7406\u89e3\u3092\u6df1\u3081\u308b\u305f\u3081\u306b\u5f93\u6765\u306ePBRFT\u3068\u306e\u9055\u3044\u3092\u898b\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n<h2 id=\"pbrft%E3%81%A8agentic-rl%E3%81%AE%E6%AF%94%E8%BC%83\" data-line=\"40\" class=\"code-line\">\n PBRFT\u3068Agentic RL\u306e\u6bd4\u8f03<\/h2>\n<p data-line=\"41\" class=\"code-line\">RL\u306f\u30de\u30eb\u30b3\u30d5\u6c7a\u5b9a\u904e\u7a0b\u3068\u3044\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u306b\u57fa\u3065\u3044\u3066\u5b9a\u5f0f\u5316\u3055\u308c\u308b\u305f\u3081\u3001\u305d\u306e\u89b3\u70b9\u3067\u5f93\u6765\u306eRL\u3068\u3057\u3066\u4f4d\u7f6e\u4ed8\u3051\u3089\u308c\u3066\u3044\u308bPBRFT\u3068Agentic RL\u306e\u4e21\u8005\u3092\u6bd4\u8f03\u3057\u305f\u3082\u306e\u304c\u4ee5\u4e0b\u306e\u8868\u3067\u3059\u3002<\/p>\n<p data-line=\"43\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--AuVPWvc8--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/52e06a4f1920b2fb8207a7c9.png%3Fsha%3D9e1bb471b3d167840ecece5386e78c4c25cdfe56\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<h3 id=\"%E7%8A%B6%E6%85%8B-(state)\" data-line=\"46\" class=\"code-line\">\n \u72b6\u614b (State)<\/h3>\n<p data-line=\"47\" class=\"code-line\">\u5f93\u6765\u306ePBRFT\u3067\u306f\u30a8\u30d4\u30bd\u30fc\u30c9\u306e\u521d\u671f\u72b6\u614b<embed-katex><eq class=\"zenn-katex\">s_0<\/eq><\/embed-katex>\u304c\u30e6\u30fc\u30b6\u30fc\u30d7\u30ed\u30f3\u30d7\u30c81\u3064\u3060\u3051\u3067\u3001\u30e2\u30c7\u30eb\u306e\u5fdc\u7b54\u5f8c\u306b\u305f\u3060\u3061\u306b\u30a8\u30d4\u30bd\u30fc\u30c9\u7d42\u4e86\u3057\u307e\u3059(horizon T=1)\u3002 \u3053\u308c\u306b\u5bfe\u3057Agentic RL\u3067\u306f\u3001\u74b0\u5883\u5185\u306e\u6642\u9593\u30b9\u30c6\u30c3\u30d7t\u306b\u304a\u3051\u308b\u72b6\u614b<embed-katex><eq class=\"zenn-katex\">s_t<\/eq><\/embed-katex>\u304b\u3089\u89b3\u6e2c<embed-katex><eq class=\"zenn-katex\">o_t=O(s_t)<\/eq><\/embed-katex>\u304c\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u4e0e\u3048\u3089\u308c\u307e\u3059\u3002\u72b6\u614b\u304a\u3088\u3073\u89b3\u6e2c\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u306b\u5fdc\u3058\u3066\u9077\u79fb\u3057\u3001\u6642\u9593\u3068\u3068\u3082\u306b\u5909\u5316\u3057\u307e\u3059(horizon T &gt; 1)\u3002<br \/>\u4f8b\u3048\u3070Research\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5834\u5408\u3001Web\u691c\u7d22\u3092\u884c\u3044\u5916\u90e8\u304b\u3089\u5f97\u3089\u308c\u308b\u60c5\u5831\u304c\u89b3\u6e2c\u306b\u76f8\u5f53\u3057\u307e\u3059\u3002Agentic RL\u306b\u304a\u3044\u3066\u306f\u3001\u72b6\u614b\uff1d\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u3068\u8003\u3048\u3066\u3082\u826f\u3044\u304b\u3082\u3057\u308c\u307e\u305b\u3093\u3002<\/p>\n<h3 id=\"%E8%A1%8C%E5%8B%95-(action)\" data-line=\"50\" class=\"code-line\">\n \u884c\u52d5 (Action)<\/h3>\n<p data-line=\"51\" class=\"code-line\">\u5f93\u6765\u306ePBRFT\u306e\u884c\u52d5\u306f\u30c6\u30ad\u30b9\u30c8\u51fa\u529b\u306e\u307f\u3067\u3059\u3002\u4e00\u65b9\u3001Agentic RL\u3067\u306f\u884c\u52d5\u7a7a\u9593\u304c\u30c6\u30ad\u30b9\u30c8\u751f\u6210 (<embed-katex><eq class=\"zenn-katex\">A_{text}<\/eq><\/embed-katex>) \u3068\u74b0\u5883\u64cd\u4f5c (<embed-katex><eq class=\"zenn-katex\">A_{action}<\/eq><\/embed-katex>) \u306e\u4e8c\u7a2e\u985e\u306b\u62e1\u5f35\u3055\u308c\u307e\u3059\u3002<br \/>\u4f8b\u3048\u3070GUI\u3092\u64cd\u4f5c\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5834\u5408\u3001\u30c6\u30ad\u30b9\u30c8\u751f\u6210\u306f\u4eba\u9593\u3084\u4ed6\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3078\u306e\u30e1\u30c3\u30bb\u30fc\u30b8\u3001\u3042\u308b\u3044\u306f\u9023\u9396\u601d\u8003\uff08Chain-of-Thought\u3001CoT\uff09\u306e\u751f\u6210\u306b\u8a72\u5f53\u3057\u3001\u74b0\u5883\u64cd\u4f5c\u306f\u30af\u30ea\u30c3\u30af\u3084\u30b9\u30af\u30ed\u30fc\u30eb\u3001\u30d5\u30a9\u30fc\u30e0\u5165\u529b\u306a\u3069\u306eGUI\u64cd\u4f5c\u306b\u76f8\u5f53\u3057\u307e\u3059\u3002<\/p>\n<h3 id=\"%E9%81%B7%E7%A7%BB%E9%96%A2%E6%95%B0-(transition)\" data-line=\"54\" class=\"code-line\">\n \u9077\u79fb\u95a2\u6570 (Transition)<\/h3>\n<p data-line=\"55\" class=\"code-line\">\u5f93\u6765\u306ePBRFT\u3067\u306f1\u56de\u306e\u884c\u52d5(\u30c6\u30ad\u30b9\u30c8\u751f\u6210)\u3068\u540c\u6642\u306b\u30a8\u30d4\u30bd\u30fc\u30c9\u7d42\u4e86\u3068\u306a\u308b\u305f\u3081\u72b6\u614b\u9077\u79fb\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u4e00\u65b9\u3001Agentic RL\u3067\u306f\u78ba\u7387\u7684\u306a\u9077\u79fb\u95a2\u6570 <embed-katex><eq class=\"zenn-katex\">P(s_{t+1} \\mid s_t, a_t)<\/eq><\/embed-katex> \u306b\u5f93\u3063\u3066\u72b6\u614b\u304c\u5404\u30b9\u30c6\u30c3\u30d7\u3067\u5909\u5316\u3057\u307e\u3059\u3002\u4f8b\u3048\u3070\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u4eba\u9593\u306b\u5bfe\u3057\u3066\u8cea\u554f\u3092\u884c\u3046\u30a2\u30af\u30b7\u30e7\u30f3\u3092\u3068\u3063\u305f\u3068\u304d\u3001\u4eba\u9593\u306e\u56de\u7b54\u306f\u6c7a\u5b9a\u7684\u3067\u306f\u306a\u3044\u305f\u3081\u6b21\u306e\u72b6\u614b\u306f\u78ba\u7387\u7684\u306b\u5909\u5316\u3057\u307e\u3059\u3002<\/p>\n<h3 id=\"%E5%A0%B1%E9%85%AC-(reward)\" data-line=\"57\" class=\"code-line\">\n \u5831\u916c (Reward)<\/h3>\n<p data-line=\"58\" class=\"code-line\">\u5f93\u6765\u306ePBRFT\u3067\u306f1\u56de\u306e\u51fa\u529b\u306b\u5bfe\u3057\u3066\u826f\u3055\u3092\u8a55\u4fa1\u3059\u308b\u30b9\u30ab\u30e9\u30fc\u5831\u916c <embed-katex><eq class=\"zenn-katex\">r(a)<\/eq><\/embed-katex> \u304c\u4e0e\u3048\u3089\u308c\u308b\u306e\u307f\u3067\u3001\u4e2d\u9593\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u306f\u3042\u308a\u307e\u305b\u3093\u3002\u4e00\u65b9Agentic RL\u3067\u306f\u30bf\u30b9\u30af\u9054\u6210\u6642\u306e\u5831\u916c\u306b\u52a0\u3048\u3001\u4e2d\u9593\u30b9\u30c6\u30c3\u30d7\u3067\u306e\u90e8\u5206\u7684\u306a\u5831\u916c\u3092\u9069\u5b9c\u4e0e\u3048\u308b\u3053\u3068\u304c\u3067\u304d\u307e\u3059\u3002\u4f8b\u3048\u3070\u30b5\u30d6\u30b4\u30fc\u30eb\u306e\u9054\u6210\u3001\u30c4\u30fc\u30eb\u3092\u6b63\u3057\u304f\u4f7f\u7528\u3059\u308b\u3053\u3068\u3001\u5358\u4f53\u30c6\u30b9\u30c8\u306e\u30d1\u30b9\u3001\u6570\u5b66\u306e\u5b9a\u7406\u8a3c\u660e\u306e\u90e8\u5206\u9032\u5c55\u306a\u3069\u306b\u5bfe\u3057\u90e8\u5206\u5831\u916c\u3092\u4e0e\u3048\u308b\u3053\u3068\u3067\u3001\u9577\u3044\u30bf\u30b9\u30af\u3092\u9014\u4e2d\u7d4c\u904e\u3082\u542b\u3081\u5b66\u7fd2\u3067\u304d\u307e\u3059\u3002\u5831\u916c\u306f\u4eba\u9593\u3084AI\u306e\u30d5\u30a3\u30fc\u30c9\u30d0\u30c3\u30af\u30e2\u30c7\u30eb(\u5831\u916c\u30e2\u30c7\u30eb)\u306b\u3088\u308b\u8a55\u4fa1\u5024\u3060\u3051\u3067\u306a\u304f\u3001\u30eb\u30fc\u30eb\u30d9\u30fc\u30b9\u306b\u3088\u308b\u691c\u8a3c\u53ef\u80fd\u306a\u5831\u916c(Verifiable Rewards)\u3084\u30b7\u30df\u30e5\u30ec\u30fc\u30bf\u5185\u306e\u30b9\u30b3\u30a2\u306a\u3069\u591a\u69d8\u306a\u8a2d\u8a08\u304c\u53ef\u80fd\u3067\u3059\u3002<\/p>\n<h3 id=\"%E7%9B%AE%E7%9A%84%E9%96%A2%E6%95%B0-(objective)\" data-line=\"60\" class=\"code-line\">\n \u76ee\u7684\u95a2\u6570 (Objective)<\/h3>\n<p data-line=\"61\" class=\"code-line\">\u5f93\u6765\u306ePBRFT\u306e\u76ee\u7684\u95a2\u6570<embed-katex><eq class=\"zenn-katex\">J_{\\theta}<\/eq><\/embed-katex>\u306f\u5358\u4e00\u30b9\u30c6\u30c3\u30d7\u306e\u671f\u5f85\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u3053\u3068\u3067\u3057\u305f\u3002\u4e00\u65b9Agentic RL\u3067\u306f\u5272\u5f15\u7d2f\u7a4d\u5831\u916c<embed-katex><eq class=\"zenn-katex\">J_{agent} = E_{\\tau \\sim \\pi_\\theta}[ \\sum_{t=0}^{T-1} \\gamma^t R_{agent}(s_t,a_t) ]<\/eq><\/embed-katex>\u3092\u6700\u5927\u5316\u3059\u308b\u9577\u671f\u6700\u9069\u5316\u554f\u984c\u3068\u306a\u308a\u307e\u3059\u3002\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u5c06\u6765\u306e\u5229\u76ca\u3082\u8003\u616e\u3057\u305f\u6226\u7565\u3092\u5b66\u7fd2\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u3001\u3053\u308c\u306b\u3088\u308a\u77ed\u671f\u7684\u306b\u306f\u4e0d\u5229\u3067\u3082\u5c06\u6765\u7684\u306b\u6709\u5229\u306a\u884c\u52d5\u3092\u9078\u3076\u3053\u3068\u304c\u6c42\u3081\u3089\u308c\u307e\u3059\u3002<\/p>\n<p data-line=\"63\" class=\"code-line\">\u3069\u3061\u3089\u306e\u30a2\u30d7\u30ed\u30fc\u30c1\u3082LLM\u306e\u6027\u80fd\u3092\u5411\u4e0a\u3055\u305b\u308b\u305f\u3081\u306bRL\u3092\u6d3b\u7528\u3057\u307e\u3059\u304c\u3001\u305d\u306e\u6839\u5e95\u306b\u3042\u308b\u4eee\u5b9a\u3001\u30bf\u30b9\u30af\u69cb\u9020\u3001\u304a\u3088\u3073\u610f\u601d\u6c7a\u5b9a\u306e\u7c92\u5ea6\u306b\u304a\u3044\u3066\u6839\u672c\u7684\u306b\u7570\u306a\u308a\u307e\u3059\u3002\u4e0b\u306e\u56f3\u306fPBRFT\u304b\u3089Agentic RL\u3078\u306e\u5404\u8981\u7d20\u3067\u306e\u30d1\u30e9\u30c0\u30a4\u30e0\u30b7\u30d5\u30c8\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p data-line=\"65\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--0quNIOMp--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/6c1d6c050f4fe2a29cb753cc.png%3Fsha%3D34c5986247649668c7d488b1acb17e4b4a9d9b64\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<h2 id=\"%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88%E3%81%AE%E3%82%B3%E3%82%A2%E8%83%BD%E5%8A%9B%E3%81%A8rl%E3%81%AB%E3%82%88%E3%82%8B%E6%9C%80%E9%81%A9%E5%8C%96\" data-line=\"68\" class=\"code-line\">\n \u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u30b3\u30a2\u80fd\u529b\u3068RL\u306b\u3088\u308b\u6700\u9069\u5316<\/h2>\n<p data-line=\"69\" class=\"code-line\">Agentic RL\u306b\u304a\u3044\u3066\u9375\u3068\u306a\u308b\u306e\u306f\u3001LLM\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u3069\u306e\u3088\u3046\u306a\u80fd\u529b\u3092\u6301\u305f\u305b\u305d\u308c\u3089\u3092RL\u3067\u6700\u9069\u5316\u3059\u308b\u304b\u3068\u3044\u3046\u3053\u3068\u3067\u3059\u3002\u3053\u306e\u8ad6\u6587\u3067\u306f\u30b3\u30a2\u80fd\u529b\u3068\u3057\u3066\u4ee5\u4e0b\u306e6\u3064\u306e\u80fd\u529b\u304c\u6319\u3052\u3089\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u3053\u3067\u306f\u305d\u308c\u305e\u308c\u80fd\u529b\u3092\u9ad8\u3081\u308b\u305f\u3081\u306bRL\u304c\u3069\u306e\u3088\u3046\u306b\u6d3b\u7528\u3055\u308c\u3066\u3044\u308b\u304b\u3092\u7d39\u4ecb\u3057\u3066\u3044\u304d\u307e\u3059\u3002<\/p>\n<ul data-line=\"71\" class=\"code-line\">\n<li data-line=\"71\" class=\"code-line\">Reasoning\uff08\u63a8\u8ad6\uff09<\/li>\n<li data-line=\"72\" class=\"code-line\">Tool Use\uff08\u30c4\u30fc\u30eb\u4f7f\u7528\uff09<\/li>\n<li data-line=\"73\" class=\"code-line\">Memory\uff08\u8a18\u61b6\uff09<\/li>\n<li data-line=\"74\" class=\"code-line\">Planning\uff08\u8a08\u753b\uff09<\/li>\n<li data-line=\"75\" class=\"code-line\">Self-Improvement\uff08\u81ea\u5df1\u6539\u5584\uff09<\/li>\n<li data-line=\"76\" class=\"code-line\">Perception\uff08\u77e5\u899a\uff09<\/li>\n<\/ul>\n<p data-line=\"78\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--orVJPsZe--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/04846afb2fe06788389eef43.png%3Fsha%3Def30a983e4f182f3060bfed160372773b6bb19c2\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<hr data-line=\"80\" class=\"code-line\"\/>\n<h3 id=\"%E6%8E%A8%E8%AB%96%EF%BC%88reasoning%EF%BC%89\" data-line=\"82\" class=\"code-line\">\n \u63a8\u8ad6\uff08Reasoning\uff09<\/h3>\n<p data-line=\"83\" class=\"code-line\">\u63a8\u8ad6\uff08Reasoning\uff09\u306f\u3001\u4e0e\u3048\u3089\u308c\u305f\u60c5\u5831\u304b\u3089\u8ad6\u7406\u7684\u306b\u7d50\u8ad6\u3092\u5c0e\u304f\u30d7\u30ed\u30bb\u30b9\u3067\u3059\u3002\u5f93\u6765\u306eLLM\u306b\u304a\u3044\u3066\u3082Chain-of-Thought(CoT)\u30d7\u30ed\u30f3\u30d7\u30c6\u30a3\u30f3\u30b0\u306a\u3069\u306e\u6280\u8853\u306b\u3088\u3063\u3066\u63a8\u8ad6\u3059\u308b\u80fd\u529b\u3092\u6301\u3061\u307e\u3059\u304c\u3001\u6700\u8fd1\u306fRL\u3092\u7528\u3044\u3066LLM\u306e\u63a8\u8ad6\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u308b\u7814\u7a76\u304c\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002\u305d\u306e\u6d41\u308c\u3092\u6c7a\u5b9a\u7684\u306b\u52a0\u901f\u3055\u305b\u305f\u306e\u304cDeepSeek-R1\u3067\u3059\u3002\u4fa1\u5024\u95a2\u6570\u30e2\u30c7\u30eb\u3092\u4e0d\u8981\u3068\u3059\u308bGRPO\u306e\u63a1\u7528\u3084\u3001\u4e00\u610f\u89e3\u30bf\u30b9\u30af\u306b\u5bfe\u3059\u308b\u30eb\u30fc\u30eb\u30d9\u30fc\u30b9\u5831\u916c\u306b\u3088\u308b\u52b9\u7387\u5316\u306a\u3069\u3001\u5177\u4f53\u7684\u306a\u30a2\u30d7\u30ed\u30fc\u30c1\u3092\u901a\u3058\u3066\u63a8\u8ad6\u5f37\u5316\u306e\u52b9\u679c\u3092\u5e83\u304f\u77e5\u3089\u3057\u3081\u307e\u3057\u305f\u3002\u305f\u3060\u3057\u5b9f\u88c5\u306f\u30af\u30ed\u30fc\u30ba\u30c9\u306a\u307e\u307e\u3060\u3063\u305f\u305f\u3081\u3001\u518d\u73fe\u6027\u306e\u3042\u308b\u6bd4\u8f03\u691c\u8a3c\u3084\u767a\u5c55\u7684\u306a\u7814\u7a76\u3092\u9032\u3081\u308b\u4e0a\u3067\u306e\u30cf\u30fc\u30c9\u30eb\u304c\u6b8b\u3063\u3066\u3044\u307e\u3057\u305f\u3002\u305d\u3053\u3067\u767b\u5834\u3057\u305f\u306e\u304cDAPO\u3067\u3059\u3002DeepSeek-R1\u3068\u80a9\u3092\u4e26\u3079\u308b\u6027\u80fd\u3092\u534a\u5206\u306e\u5b66\u7fd2\u30b9\u30c6\u30c3\u30d7\u3067\u9054\u6210\u3057\u3064\u3064\u3001\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u30fb\u30b3\u30fc\u30c9\u30fb\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u4e00\u5f0f\u3092\u5b8c\u5168\u30aa\u30fc\u30d7\u30f3\u30bd\u30fc\u30b9\u5316\u3057\u3001\u63a8\u8ad6\u30e2\u30c7\u30eb\u306eRL\u7814\u7a76\u3092\u5b9f\u88c5\u30ec\u30d9\u30eb\u3067\u518d\u73fe\u30fb\u62e1\u5f35\u3067\u304d\u308b\u74b0\u5883\u3092\u63d0\u4f9b\u3057\u305f\u70b9\u304c\u5927\u304d\u306a\u8ca2\u732e\u3068\u8a00\u3048\u307e\u3059\u3002<\/p>\n<p data-line=\"85\" class=\"code-line\">\u63a8\u8ad6\u30e2\u30c7\u30eb\u306e\u7814\u7a76\u306f\u63a8\u8ad6\u80fd\u529b\u306e\u3055\u3089\u306a\u308b\u5411\u4e0a\u3068\u3044\u3046\u65b9\u5411\u6027\u306e\u307b\u304b\u306b\u3001\u8003\u3048\u3059\u304e\uff08overthinking\uff09\u306e\u554f\u984c\u3078\u306e\u5bfe\u51e6\u3082\u6c42\u3081\u3089\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u306f\u5fc5\u8981\u4ee5\u4e0a\u306b\u9577\u304f\u8003\u3048\u3059\u304e\u308b\u3053\u3068\u3067\u30e6\u30fc\u30b6\u3078\u306e\u5fdc\u7b54\u306b\u6642\u9593\u3092\u8981\u3057\u3066\u3057\u307e\u3046\u554f\u984c\u3084\u9577\u8003\u3059\u308b\u3053\u3068\u3067\u9006\u306b\u7cbe\u5ea6\u304c\u60aa\u5316\u3059\u308b\u554f\u984c\u306b\u3064\u306a\u304c\u308a\u307e\u3059\u3002<br \/>Qwen3\u306f\u8907\u96d1\u306a\u591a\u6bb5\u968e\u63a8\u8ad6\u3092\u884c\u3046\u305f\u3081\u306e\u300c\u601d\u8003\u30e2\u30fc\u30c9\uff08thinking mode\uff09\u300d\u3068\u3001\u8fc5\u901f\u306a\u5fdc\u7b54\u306e\u305f\u3081\u306e\u300c\u975e\u601d\u8003\u30e2\u30fc\u30c9\uff08non-thinking mode\uff09\u300d\u3092\u5358\u4e00\u306e\u30e2\u30c7\u30eb\u3067\u5b9f\u73fe\u3059\u308b\u305f\u3081\u306bRL\u3068\u6559\u5e2b\u3042\u308a\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0(SFT)\u3092\u7d44\u307f\u5408\u308f\u305b\u3066\u4ee5\u4e0b\u306e4stage\u5b66\u7fd2\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002\u9762\u767d\u3044\u306e\u306f\u3001thinking mode\u306e\u5b66\u7fd2\u306b\u3088\u3063\u3066thinking budget(\u601d\u8003\u4e88\u7b97)\u3068\u547c\u3070\u308c\u308b\u30e6\u30fc\u30b6\u30fc\u304c\u63a8\u8ad6\u306b\u5272\u308a\u5f53\u3066\u308b\u8a08\u7b97\u30ea\u30bd\u30fc\u30b9\u3092\u30c8\u30fc\u30af\u30f3\u6570\u3067\u6307\u5b9a\u3067\u304d\u308b\u4ed5\u7d44\u307f\u3092\u81ea\u7136\u306b\u7372\u5f97\u3057\u3066\u3044\u308b\u70b9\u3067\u3059\u3002<\/p>\n<ul data-line=\"88\" class=\"code-line\">\n<li data-line=\"88\" class=\"code-line\">stage1. Long-CoT Cold Start (SFT): \u57fa\u672c\u7684\u306a\u63a8\u8ad6\u30d1\u30bf\u30fc\u30f3\u3092\u30e2\u30c7\u30eb\u306bSFT\u3067\u5b66\u7fd2\u3059\u308b<\/li>\n<li data-line=\"89\" class=\"code-line\">stage2. Reasoning RL: \u9ad8\u5ea6\u3067\u8907\u96d1\u306a\u63a8\u8ad6\u30bf\u30b9\u30af\uff08\u6570\u5b66\u3084\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u306a\u3069\uff09\u6027\u80fd\u3092RL\u3067\u6539\u5584\u3059\u308b<\/li>\n<li data-line=\"90\" class=\"code-line\">stage3. Thinking Mode Fusion (SFT): \u30e6\u30fc\u30b6\u30fc\u304b\u3089\u306e \/think \u3084 \/no_think \u3068\u3044\u3063\u305f\u6307\u793a\u8ffd\u5f93\u3092SFT\u3067\u5b66\u7fd2\u3059\u308b<\/li>\n<li data-line=\"91\" class=\"code-line\">stage4. General RL: \u4e00\u822c\u7684\u306a\u30bf\u30b9\u30af\uff08\u6307\u793a\u8ffd\u5f93\u3001\u30d5\u30a9\u30fc\u30de\u30c3\u30c8\u9075\u5b88\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u80fd\u529b\u306a\u3069\uff09\u306b\u5bfe\u3057\u3066\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u597d\u307f\u306b\u5408\u3046\u3088\u3046\u306b\u30e2\u30c7\u30eb\u306e\u5fdc\u7b54\u3092\u8abf\u6574\u3059\u308b<\/li>\n<\/ul>\n<p data-line=\"93\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--b2U9t9OG--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/7f4d9ba44f6aa5621189a979.png%3Fsha%3D04bb5a13b84948c708e6a480be349aa115d30c29\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Qwen3 Technical Report (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.09388\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.09388<\/a>)<\/em><\/p>\n<p data-line=\"96\" class=\"code-line\">\u307e\u305fstage2\u306eReasoning RL\u3067\u306f\u5b66\u7fd2\u3092\u5b89\u5b9a\u3055\u305b\u308b\u305f\u3081\u306b\u4ee5\u4e0b\u306e\u57fa\u6e96\u3092\u6e80\u305f\u3059\u3088\u3046\u306b\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u8a2d\u8a08\u3057\u3066\u3044\u307e\u3059\u3002\u7279\u306b2\u3064\u76ee\u30683\u3064\u76ee\u304b\u3089Reasoning RL\u306b\u304a\u3044\u3066\u96e3\u6613\u5ea6\u8a2d\u5b9a\u304c\u91cd\u8981\u305d\u3046\u306a\u5370\u8c61\u3092\u53d7\u3051\u307e\u3059\u3002<\/p>\n<ul data-line=\"97\" class=\"code-line\">\n<li data-line=\"97\" class=\"code-line\">\u30b3\u30fc\u30eb\u30c9\u30b9\u30bf\u30fc\u30c8\u6bb5\u968e\u3067\u4f7f\u7528\u3055\u308c\u3066\u3044\u306a\u3044\u3053\u3068<\/li>\n<li data-line=\"98\" class=\"code-line\">\u30b3\u30fc\u30eb\u30c9\u30b9\u30bf\u30fc\u30c8\u30e2\u30c7\u30eb\u306b\u3068\u3063\u3066\u5b66\u7fd2\u53ef\u80fd\u3067\u3042\u308b\u3053\u3068<\/li>\n<li data-line=\"99\" class=\"code-line\">\u53ef\u80fd\u306a\u9650\u308a\u6311\u6226\u7684\u3067\u3042\u308b\u3053\u3068<\/li>\n<li data-line=\"100\" class=\"code-line\">\u5e83\u7bc4\u306a\u30b5\u30d6\u30c9\u30e1\u30a4\u30f3\u3092\u30ab\u30d0\u30fc\u3057\u3066\u3044\u308b\u3053\u3068<\/li>\n<\/ul>\n<hr data-line=\"102\" class=\"code-line\"\/>\n<h3 id=\"%E3%83%84%E3%83%BC%E3%83%AB%E4%BD%BF%E7%94%A8%EF%BC%88tool-use%EF%BC%89\" data-line=\"104\" class=\"code-line\">\n \u30c4\u30fc\u30eb\u4f7f\u7528\uff08Tool Use\uff09<\/h3>\n<p data-line=\"106\" class=\"code-line\">\u30c4\u30fc\u30eb\u4f7f\u7528\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u5916\u90e8\u306e\u60c5\u5831\u6e90\u3084API\u3001\u8a08\u7b97\u8cc7\u6e90\u306a\u3069\u3092\u547c\u3073\u51fa\u3057\u3066\u6d3b\u7528\u3059\u308b\u80fd\u529b\u3067\u3059\u3002\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u3067\u306e\u60c5\u5831\u53d6\u5f97\u3084\u96fb\u5353\u30fb\u30b3\u30fc\u30c9\u5b9f\u884c\u3001\u4ed6\u306e\u30e2\u30c7\u30eb\u3078\u306e\u30af\u30a8\u30ea\u306a\u3069\u3001\u30bf\u30b9\u30af\u9054\u6210\u306b\u5fc5\u8981\u306a\u3042\u3089\u3086\u308b\u5916\u90e8\u30c4\u30fc\u30eb\u3068\u306e\u30a4\u30f3\u30bf\u30e9\u30af\u30b7\u30e7\u30f3\u3092\u542b\u307f\u307e\u3059\u3002RL\u306b\u3088\u308a\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f <strong>\u300c\u3069\u306e\u30bf\u30a4\u30df\u30f3\u30b0\u3067\u3001\u3069\u306e\u30c4\u30fc\u30eb\u3092\u3001\u3069\u3046\u4f7f\u3046\u304b\u300d<\/strong> \u3092\u8a66\u884c\u932f\u8aa4\u304b\u3089\u5b66\u3073\u53d6\u308c\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002\u767a\u5c55\u306e\u6d41\u308c\u306f\u5927\u304d\u304f3\u6bb5\u968e\u3042\u308a\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--jrczPh3O--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/fdaa441c67a681d284a906cc.png%3Fsha%3D62d9674cf0141bd9e008e8b73cab18a6cd7c0e9b\" alt=\"alt text\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<h4 id=\"react%E5%BD%A2%E5%BC%8F%E3%81%AE%E3%83%84%E3%83%BC%E3%83%AB%E5%88%A9%E7%94%A8\" data-line=\"108\" class=\"code-line\">\n ReAct\u5f62\u5f0f\u306e\u30c4\u30fc\u30eb\u5229\u7528<\/h4>\n<p data-line=\"109\" class=\"code-line\">\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u30c4\u30fc\u30eb\u5229\u7528\u306b\u3064\u3044\u3066\u3001\u521d\u671f\u306fReAct\u3068\u547c\u3070\u308c\u308b\u30d7\u30ed\u30f3\u30d7\u30c8\u30d9\u30fc\u30b9\u306e\u624b\u6cd5\u3084\u3001Toolformer\u3068\u547c\u3070\u308c\u308b\u30c4\u30fc\u30eb\u5229\u7528\u30d7\u30ed\u30bb\u30b9\u3092SFT\u3067\u6a21\u5023\u5b66\u7fd2\u3057\u30c4\u30fc\u30eb\u5229\u7528\u80fd\u529b\u3092\u7372\u5f97\u3059\u308b\u624b\u6cd5\u304c\u8a66\u307f\u3089\u308c\u307e\u3057\u305f\u3002\u3057\u304b\u3057\u3001\u6a21\u5023\u5b66\u7fd2\u306e\u5834\u5408\u6559\u308f\u3063\u3066\u3044\u306a\u3044\u672a\u77e5\u306e\u30c4\u30fc\u30eb\u3078\u306e\u6c4e\u5316\u306f\u96e3\u3057\u304f\u67d4\u8edf\u6027\u306b\u6b20\u3051\u307e\u3059\u3002\u307e\u305f\u3001\u30c4\u30fc\u30eb\u5229\u7528\u5c65\u6b74\u30c7\u30fc\u30bf\u3092\u7528\u610f\u3059\u308b\u30b3\u30b9\u30c8\u3082\u3042\u308b\u3053\u3068\u304b\u3089\u3001RL\u3092\u7528\u3044\u3066\u30a2\u30a6\u30c8\u30ab\u30e0\u30d9\u30fc\u30b9\u3067\u30c4\u30fc\u30eb\u5229\u7528\u6226\u7565\u3092\u5b66\u7fd2\u3059\u308b\u8a66\u307f\u304c\u59cb\u307e\u308a\u307e\u3057\u305f\u3002<\/p>\n<h4 id=\"%E3%83%84%E3%83%BC%E3%83%AB%E7%B5%B1%E5%90%88%E5%9E%8Brl-(tool-integrated-rl)\" data-line=\"111\" class=\"code-line\">\n \u30c4\u30fc\u30eb\u7d71\u5408\u578bRL (Tool-Integrated RL)<\/h4>\n<p data-line=\"112\" class=\"code-line\">\u6b21\u306e\u6bb5\u968e\u3067\u306f\u3001\u30c4\u30fc\u30eb\u4f7f\u7528\u3092LLM\u306e\u8a8d\u77e5\u30eb\u30fc\u30d7\u306b\u6df1\u304f\u7d44\u307f\u8fbc\u307f\u3001\u8907\u6570\u30bf\u30fc\u30f3\u306b\u308f\u305f\u3063\u3066\u30c4\u30fc\u30eb\u3092\u4f7f\u3044\u3053\u306a\u3059\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u30b7\u30b9\u30c6\u30e0\u304c\u767b\u5834\u3057\u307e\u3057\u305f\u3002\u3069\u306e\u5c40\u9762\u3067\u3069\u306e\u30c4\u30fc\u30eb\u3092\u547c\u3076\u304b\u3001\u5f97\u305f\u60c5\u5831\u3092\u3069\u3046\u6d3b\u7528\u3059\u308b\u304b\u3092\u5831\u916c\u306b\u57fa\u3065\u304dRL\u3067\u5b66\u7fd2\u3057\u307e\u3059\u3002<br \/>\u4f8b\u3048\u3070ReTool\u3067\u306f\u8907\u96d1\u306a\u6570\u5b66\u7684\u554f\u984c\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066DeepSeek-R1\u306e\u3088\u3046\u306b\u30c6\u30ad\u30b9\u30c8\u30d9\u30fc\u30b9\u306eRL\u3092\u884c\u3046\u306e\u3067\u306f\u306a\u304f\u3001Python\u306e\u30b3\u30fc\u30c9\u30a4\u30f3\u30bf\u30d7\u30ea\u30bf\u3092\u30c4\u30fc\u30eb\u3068\u3057\u3066\u6d3b\u7528\u3059\u308b\u80fd\u529b\u3092RL\u306b\u3088\u3063\u3066\u5b66\u7fd2\u3059\u308b\u3053\u3068\u3067\u6b63\u89e3\u7387\u3092\u5411\u4e0a\u3055\u305b\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u7814\u7a76\u3067\u306fSFT\u3067\u57fa\u672c\u7684\u306a\u30c4\u30fc\u30eb\u5229\u7528\u80fd\u529b\u3092\u5b66\u7fd2\u5f8c\u306bRL\u3067\u6700\u7d42\u7684\u306a\u56de\u7b54\u306b\u5bfe\u3059\u308b\u6b63\u89e3\u5831\u916c\u306b\u3088\u3063\u3066\u30c4\u30fc\u30eb\u5229\u7528\u6226\u7565\u3092\u5b66\u7fd2\u3057\u307e\u3057\u305f\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--aR5gjDSV--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/04b8ff15acacca7a534ddd96.png%3Fsha%3D74deb7e045c4a8040af02a5b9b9d169cf84711b2\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>ReTool: Reinforcement Learning for Strategic Tool Use in LLMs (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2504.11536\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2504.11536<\/a>)<\/em><\/p>\n<p data-line=\"117\" class=\"code-line\">\u307b\u307c\u540c\u6642\u671f\u306b\u767a\u8868\u3055\u308c\u305fARTIST\u3082\u4f3c\u305f\u3088\u3046\u306a\u30a2\u30d7\u30ed\u30fc\u30c1\u3092\u3068\u3063\u3066\u3044\u307e\u3059\u304c\u3001ARTIST\u306f\u6570\u5b66\u7684\u30bf\u30b9\u30af\u3060\u3051\u3067\u306a\u304f\u3001BFCL v3\u3084\u03c4-bench\u3068\u3044\u3063\u305f\u30de\u30eb\u30c1\u30b9\u30c6\u30c3\u30d7\u306eFunction Calling\u304c\u5fc5\u8981\u3068\u306a\u308b\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u3067\u306e\u8a55\u4fa1\u3092\u884c\u3063\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3089\u306e\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u63a8\u8ad6\u30fb\u30c4\u30fc\u30eb\u5229\u7528\u3092\u7e70\u308a\u8fd4\u3057\u884c\u3044\u306a\u304c\u3089\u30bf\u30b9\u30af\u306b\u5bfe\u3059\u308b\u6700\u7d42\u7684\u306a\u56de\u7b54\u3092\u751f\u6210\u3057\u3001\u6700\u7d42\u56de\u7b54\u306e\u6b63\u89e3\u5831\u916c\u306b\u52a0\u3048\u3066\u3001\u30c4\u30fc\u30eb\u547c\u3073\u51fa\u3057\u6210\u529f\u5831\u916c\u306b\u3088\u3063\u3066\u3001\u3044\u3064\u3069\u306e\u30c4\u30fc\u30eb\u3092\u4f7f\u3046\u306e\u304c\u826f\u3044\u304b\u3092RL\u3067\u5b66\u7fd2\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--uUud07Fk--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/2b0ece1bb298b8ed5b5a24b9.png%3Fsha%3D85ad56114028835a3c72836bb577414090d8a589\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Agentic Reasoning and Tool Integration for LLMs via Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.01441\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.01441<\/a>)<\/em><\/p>\n<p data-line=\"121\" class=\"code-line\">\u4e0a\u8a18\u3067\u793a\u3057\u305f\u3088\u3046\u306aRL\u3092\u5229\u7528\u3057\u305f\u30c4\u30fc\u30eb\u7d71\u5408\u578b\u63a8\u8ad6\u306f\u3001\u7814\u7a76\u9818\u57df\u3060\u3051\u3067\u306a\u304fChatGPT\u306eDeep Research\u3084OpenAI o3\u306e\u3088\u3046\u306a\u5546\u7528\u30b7\u30b9\u30c6\u30e0\u306e\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u306b\u3082\u63a1\u7528\u3055\u308c\u3066\u3044\u308b\u3068\u8a00\u308f\u308c\u3066\u3044\u307e\u3059\u3002(\u9069\u7528\u65b9\u6cd5\u306e\u8a73\u7d30\u306f\u4e0d\u660e)<\/p>\n<h4 id=\"%E9%95%B7%E6%9C%9F%E7%9A%84%E3%83%BB%E3%83%9E%E3%83%AB%E3%83%81%E3%82%B9%E3%83%86%E3%83%83%E3%83%97%E3%81%AE%E3%83%84%E3%83%BC%E3%83%AB%E4%BD%BF%E7%94%A8\" data-line=\"123\" class=\"code-line\">\n \u9577\u671f\u7684\u30fb\u30de\u30eb\u30c1\u30b9\u30c6\u30c3\u30d7\u306e\u30c4\u30fc\u30eb\u4f7f\u7528<\/h4>\n<p data-line=\"124\" class=\"code-line\">\u4eca\u5f8c\u306e\u7814\u7a76\u306e\u65b9\u5411\u6027\u3068\u3057\u3066\u3001\u9577\u671f\u7684\u306a\u30b9\u30c6\u30c3\u30d7\u3067\u306e\u30c4\u30fc\u30eb\u9023\u643a\u3084\u3001\u8907\u6570\u306e\u30c4\u30fc\u30eb\u306e\u7d44\u307f\u5408\u308f\u305b\u306b\u3088\u308b\u8907\u96d1\u306a\u30bf\u30b9\u30af\u89e3\u6c7a\u304c\u6319\u3052\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p data-line=\"126\" class=\"code-line\">DeepSeek\u304c\u767a\u8868\u3057\u305fGRPO\u306f\u3001\u6570\u5b66\u306e\u554f\u984c\u306e\u3088\u3046\u306a1\u554f1\u7b54\u578b\u306e\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u6709\u52b9\u306aRL\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u3059\u304c\u3001\u4e00\u9023\u306e\u884c\u52d5\u3059\u3079\u3066\u3092\u307e\u3068\u3081\u3066\u8a55\u4fa1\u3059\u308b\u305f\u3081\u3001\u30de\u30eb\u30c1\u30b9\u30c6\u30c3\u30d7\u306e\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u306f\u500b\u3005\u306e\u30b9\u30c6\u30c3\u30d7\u306e\u826f\u3057\u60aa\u3057\u3092\u5224\u65ad\u3059\u308b\u306e\u304c\u96e3\u3057\u3044\u3068\u3044\u3046\u8ab2\u984c\u304c\u3042\u308a\u307e\u3059\u3002<br \/>GiGPO\u3067\u306f\u3053\u306e\u554f\u984c\u306b\u5bfe\u51e6\u3059\u308b\u305f\u3081\u306b\u3001\u30a8\u30d4\u30bd\u30fc\u30c9\u30ec\u30d9\u30eb\u3068\u30b9\u30c6\u30c3\u30d7\u30ec\u30d9\u30eb\u306e2\u3064\u306e\u30b0\u30eb\u30fc\u30d7\u69cb\u9020\u3067advantage\uff08\u884c\u52d5\u306e\u826f\u3057\u60aa\u3057\u306e\u57fa\u6e96\uff09\u3092\u8a08\u7b97\u3059\u308bGroup-in-Group Policy Optimization\uff08GiGPO\uff09\u3068\u3044\u3046\u624b\u6cd5\u3092\u63a1\u7528\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--U-kP3nS3--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/a48e87f40cda63e497fcd313.png%3Fsha%3D7599349aded2ef9c5f6c50d105942a07f01ab371\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Group-in-Group Policy Optimization for LLM Agent Training (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.10978\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.10978<\/a>)<\/em><\/p>\n<hr data-line=\"131\" class=\"code-line\"\/>\n<h3 id=\"%E3%83%A1%E3%83%A2%E3%83%AA%EF%BC%88%E9%95%B7%E6%9C%9F%E3%83%BB%E7%9F%AD%E6%9C%9F%E8%A8%98%E6%86%B6%EF%BC%89\" data-line=\"133\" class=\"code-line\">\n \u30e1\u30e2\u30ea\uff08\u9577\u671f\u30fb\u77ed\u671f\u8a18\u61b6\uff09<\/h3>\n<p data-line=\"134\" class=\"code-line\">\u30e1\u30e2\u30ea\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u904e\u53bb\u306b\u5f97\u305f\u60c5\u5831\u3084\u7d4c\u9a13\u3092\u4fdd\u6301\u30fb\u518d\u5229\u7528\u3059\u308b\u80fd\u529b\u3067\u3059\u3002LLM\u81ea\u8eab\u306e\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u30a6\u30a3\u30f3\u30c9\u30a6\u306f\u6709\u9650\u3067\u3042\u308b\u305f\u3081\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3057\u3066\u9577\u671f\u9593\u6d3b\u52d5\u3059\u308b\u306b\u306f\u5916\u90e8\u8a18\u61b6\uff08\u30ca\u30ec\u30c3\u30b8\u30d9\u30fc\u30b9\u3084\u5bfe\u8a71\u5c65\u6b74\uff09\u3092\u6d3b\u7528\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3059\u3002\u3053\u306e\u8ab2\u984c\u306b\u5bfe\u3057\u3001\u5f93\u6765\u306fRetrieval-Augmented Generation\uff08RAG\uff09\u306b\u3088\u308b\u691c\u7d22\u30fb\u53c2\u7167\u3084\u3001\u4f1a\u8a71\u5c65\u6b74\u3092\u6bce\u56de\u30d7\u30ed\u30f3\u30d7\u30c8\u306b\u8a70\u3081\u8fbc\u3080\u30a6\u30a3\u30f3\u30c9\u30a6\u62e1\u5f35\u306a\u3069\u304c\u7528\u3044\u3089\u308c\u3066\u304d\u307e\u3057\u305f\u3002\u3057\u304b\u3057\u3001\u9759\u7684\u306a\u691c\u7d22\u6226\u7565\u3084\u624b\u52d5\u3067\u8a2d\u8a08\u3055\u308c\u305f\u30e1\u30e2\u30ea\u66f4\u65b0\u3067\u306f\u3001\u30bf\u30b9\u30af\u306b\u6700\u9069\u306a\u60c5\u5831\u691c\u7d22\u30fb\u5fd8\u5374\u304c\u3067\u304d\u306a\u3044\u5834\u5408\u304c\u3042\u308a\u307e\u3059\u3002Agentic RL\u3067\u306f\u3001\u3069\u306e\u60c5\u5831\u3092\u8a18\u61b6\u3057\u3001\u4f55\u3092\u601d\u3044\u51fa\u3059\u3079\u304d\u304b\u3092RL\u3067\u5b66\u7fd2\u3055\u305b\u307e\u3059\u3002<\/p>\n<h4 id=\"rag%E5%BD%A2%E5%BC%8F%E3%81%AE%E3%83%A1%E3%83%A2%E3%83%AA\" data-line=\"135\" class=\"code-line\">\n RAG\u5f62\u5f0f\u306e\u30e1\u30e2\u30ea<\/h4>\n<p data-line=\"136\" class=\"code-line\">RAG\u5f62\u5f0f\u306e\u691c\u7d22\u30e1\u30ab\u30cb\u30ba\u30e0\u3092RL\u3067\u6700\u9069\u5316\u3059\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3068\u3057\u3066\u3001Tan et al.\uff082025\uff09\u304c\u63d0\u6848\u3057\u305fReflective Memory Management\uff08RMM\uff09\u306b\u304a\u3051\u308b\u5f8c\u65b9\u30ea\u30d5\u30ec\u30af\u30b7\u30e7\u30f3\uff08Retrospective Reflection\uff09\u304c\u6319\u3052\u3089\u308c\u307e\u3059\u3002\u3053\u306e\u624b\u6cd5\u306f\u3001\u5f93\u6765\u306eRAG\u304c\u6301\u3064\u300c\u691c\u7d22\u65b9\u6cd5\u304c\u56fa\u5b9a\u7684\u3067\u3001\u5bfe\u8a71\u306e\u6587\u8108\u306b\u5fdc\u3058\u3066\u6700\u9069\u5316\u3055\u308c\u306a\u3044\u300d\u3068\u3044\u3046\u8ab2\u984c\u306b\u5bfe\u51e6\u3059\u308b\u3082\u306e\u3067\u3059\u3002\u624b\u9806\u3068\u3057\u3066\u306f\u4ee5\u4e0b\u306e\u3068\u304a\u308a\u3067\u3059\u3002<\/p>\n<ol data-line=\"137\" class=\"code-line\">\n<li data-line=\"137\" class=\"code-line\">Retriever\u306b\u3088\u3063\u3066\u691c\u7d22\u3055\u308c\u305f\u8a18\u61b6\u306e\u5019\u88dc\u3092Reranker\u304c\u7d5e\u308a\u8fbc\u3080<\/li>\n<li data-line=\"138\" class=\"code-line\">LLM\u304c\u305d\u306e\u8a18\u61b6\u3092\u7528\u3044\u3066\u5fdc\u7b54\u3092\u751f\u6210\u3059\u308b\u969b\u3001\u5b9f\u969b\u306b\u3069\u306e\u8a18\u61b6\u3092\u5f15\u7528\u3057\u305f\u304b\u3092\u81ea\u5df1\u8a55\u4fa1\u3059\u308b<\/li>\n<li data-line=\"139\" class=\"code-line\">\u5f15\u7528\u3055\u308c\u305f\u8a18\u61b6\u306b\u306f\u6b63\u306e\u5831\u916c\uff08+1\uff09\u3001\u3055\u308c\u306a\u304b\u3063\u305f\u8a18\u61b6\u306b\u306f\u8ca0\u306e\u5831\u916c\uff08-1\uff09\u3092\u4e0e\u3048\u3066Reranker\u306e\u30d1\u30e9\u30e1\u30fc\u30bf\u3092\u66f4\u65b0\u3059\u308b<\/li>\n<\/ol>\n<p data-line=\"141\" class=\"code-line\">\u3053\u306e\u4e00\u9023\u306e\u51e6\u7406\u3092\u30aa\u30f3\u30e9\u30a4\u30f3RL\u306b\u3088\u308a\u3001\u5bfe\u8a71\u3092\u901a\u3058\u3066\u300cLLM\u304c\u672c\u5f53\u306b\u5fc5\u8981\u3068\u3059\u308b\u8a18\u61b6\u300d\u3092\u3088\u308a\u7684\u78ba\u306b\u9078\u629e\u3067\u304d\u308b\u3088\u3046Reranker\u3092\u7d99\u7d9a\u7684\u306b\u5b66\u7fd2\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--G06kl7jE--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/fb449e3f27a873a9fc70af49.png%3Fsha%3D106bb7c2ccee9640851a36483f4c14cd93103901\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>In Prospect and Retrospect: Reflective Memory Management for Long-term Personalized Dialogue Agents (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2503.08026\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2503.08026<\/a>)<\/em><\/p>\n<p data-line=\"145\" class=\"code-line\">\u4e0a\u8a18\u306fRAG\u306e\u691c\u7d22\u30e1\u30ab\u30cb\u30ba\u30e0\u306e\u6539\u5584\u306bRL\u3092\u5229\u7528\u3057\u305f\u4f8b\u3067\u3057\u305f\u304c\u3001Memory-R1\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u5916\u90e8\u30e1\u30e2\u30ea\u3092\u7ba1\u7406\u3059\u308b\u305f\u3081\u306b\u3082RL\u3092\u5229\u7528\u3057\u307e\u3059\u3002Memory-R1\u306f\u300cMemory Manager\u300d\u3068\u300cAnswer Agent\u300d\u3068\u3044\u30462\u3064\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3092\u5c0e\u5165\u3057\u3066\u3044\u307e\u3059\u3002Memory Manager\u306f\u3001\u30e1\u30e2\u30ea\u306e\u30a8\u30f3\u30c8\u30ea\u3092\u300c\u8ffd\u52a0\uff08ADD\uff09\u300d\u300c\u66f4\u65b0\uff08UPDATE\uff09\u300d\u300c\u524a\u9664\uff08DELETE\uff09\u300d\u300c\u4f55\u3082\u3057\u306a\u3044\uff08NOOP\uff09\u300d\u3068\u3044\u3063\u305f\u30e1\u30e2\u30ea\u64cd\u4f5c\u3092\u5b66\u7fd2\u3057\u3001Answer Agent\u306f\u3001\u53d6\u5f97\u3057\u305f\u30e1\u30e2\u30ea\u306e\u4e2d\u304b\u3089\u6700\u3082\u95a2\u9023\u6027\u306e\u9ad8\u3044\u3082\u306e\u3092\u9078\u629e\u3057\u3066\u56de\u7b54\u3092\u751f\u6210\u3057\u307e\u3059\u3002\u4e21\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5b66\u7fd2\u306b\u306fRL\u304c\u7528\u3044\u3089\u308c\u3066\u3044\u307e\u3059\u304c\u7279\u306b\u6ce8\u76ee\u3059\u3079\u304d\u306fMemory Manager\u306e\u5b66\u7fd2\u65b9\u6cd5\u3067\u3059\u3002Memory Manager\u306f\u81ea\u8eab\u306e\u884c\u52d5\u305d\u306e\u3082\u306e\u306b\u5bfe\u3057\u3066\u306e\u5831\u916c\u306f\u4e0e\u3048\u3089\u308c\u305a\u3001Answer Agent\u304c\u6b63\u3057\u3044\u56de\u7b54\u3092\u751f\u6210\u3067\u304d\u305f\u304b\uff1f\u3068\u3044\u3046\u6700\u7d42\u7684\u306a\u30a2\u30a6\u30c8\u30ab\u30e0\u306b\u5bfe\u3057\u3066\u5831\u916c\u304c\u4e0e\u3048\u3089\u308c\u6700\u9069\u306a\u30e1\u30e2\u30ea\u64cd\u4f5c\u6226\u7565\u3092\u5b66\u7fd2\u3057\u307e\u3059\u3002\u81ea\u8eab\u306e\u884c\u52d5\u304c\u4ed6\u306e\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u306b\u5f71\u97ff\u3092\u4e0e\u3048\u3066\u305d\u306e\u7d50\u679c\u306b\u57fa\u3065\u304d\u5b66\u7fd2\u304c\u884c\u308f\u308c\u308b\u3068\u3044\u3046\u306e\u306fRL\u306a\u3089\u3067\u306f\u3067\u9762\u767d\u3044\u7814\u7a76\u3060\u306a\u3068\u601d\u3044\u307e\u3057\u305f\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--6d5CgMOX--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/57c0363d3177a4bd1799beb8.png%3Fsha%3Da8e66d3d5d89b07921bde72086f7cdc79ab5ae67\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Memory-R1: Enhancing Large Language Model Agents to Manage and Utilize Memories via Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2508.19828\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2508.19828<\/a>)<\/em><\/p>\n<h4 id=\"token-level%E3%83%A1%E3%83%A2%E3%83%AA\" data-line=\"150\" class=\"code-line\">\n Token-level\u30e1\u30e2\u30ea<\/h4>\n<p data-line=\"151\" class=\"code-line\">\u3053\u3061\u3089\u306fRAG\u306e\u3088\u3046\u306b\u5916\u90e8\u30e1\u30e2\u30ea\u3092\u5229\u7528\u305b\u305a\u3001LLM\u81ea\u8eab\u304c\u5b66\u7fd2\u53ef\u80fd\u306a\u30e1\u30e2\u30ea\u3092\u5099\u3048\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3067\u3059\u3002<br \/>MemAgent\u306fLLM\u304c\u975e\u5e38\u306b\u9577\u3044\u30c6\u30ad\u30b9\u30c8\uff08\u6570\u767e\u4e07\u30c8\u30fc\u30af\u30f3\uff09\u3092\u6271\u3048\u308b\u3088\u3046\u306b\u3059\u308b\u3053\u3068\u3092\u76ee\u7684\u3068\u3057\u3066\u3044\u307e\u3059\u3002\u4eba\u9593\u304c\u9577\u3044\u6587\u7ae0\u3092\u8aad\u3080\u969b\u306b\u30e1\u30e2\u3092\u53d6\u308b\u3088\u3046\u306b\u3001MemAgent\u306f\u30c6\u30ad\u30b9\u30c8\u3092\u30c1\u30e3\u30f3\u30af\u306b\u5206\u5272\u3057\u3066\u9806\u306b\u8aad\u307f\u8fbc\u307f\u3001\u56fa\u5b9a\u9577\u306e\u300c\u30e1\u30e2\u30ea\u300d\u306b\u5fc5\u8981\u306a\u60c5\u5831\u3092\u66f8\u304d\u8fbc\u307f\u306a\u304c\u3089\u5185\u5bb9\u3092\u7406\u89e3\u3057\u3066\u3044\u304d\u307e\u3059\u3002\u3053\u306e\u30e1\u30e2\u30ea\u7ba1\u7406(\u9650\u3089\u308c\u305f\u30b3\u30f3\u30c6\u30ad\u30b9\u30c8\u9577\u306b\u4f55\u3092\u8a18\u61b6\u3055\u305b\u308b\u304b)\u3092\u6700\u7d42\u7684\u306a\u30bf\u30b9\u30af\u306e\u6210\u529f\u5831\u916c\u306b\u57fa\u3065\u304dRL\u3067\u6700\u9069\u5316\u3057\u307e\u3059\u3002MemAgent\u306e\u4ed5\u7d44\u307f\u81ea\u4f53\u3082\u6709\u7528\u3067\u3059\u304c\u3001RL\u3042\u308aMemAgent\u304cRL\u306a\u3057MemAgent\u3088\u308a\u3082\u6027\u80fd\u304c\u5411\u4e0a\u3059\u308b\u7d50\u679c\u3068\u306a\u3063\u3066\u304a\u308aRL\u306e\u6709\u7528\u6027\u304c\u793a\u3055\u308c\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--uHt9ORLM--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/bb6fedd65799c39655a2a646.png%3Fsha%3D72c356d813a2bbb64d831a82af55252c2c7c980b\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>MemAgent: Reshaping Long-Context LLM with Multi-Conv RL-based Memory Agent (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2507.02259\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2507.02259<\/a>)<\/em><\/p>\n<hr data-line=\"156\" class=\"code-line\"\/>\n<h3 id=\"%E3%83%97%E3%83%A9%E3%83%B3%E3%83%8B%E3%83%B3%E3%82%B0%EF%BC%88%E8%A8%88%E7%94%BB%EF%BC%89\" data-line=\"158\" class=\"code-line\">\n \u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\uff08\u8a08\u753b\uff09<\/h3>\n<p data-line=\"159\" class=\"code-line\">\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\u3068\u306f\u3001\u30b4\u30fc\u30eb\u3092\u9054\u6210\u3059\u308b\u305f\u3081\u306e\u884c\u52d5\u7cfb\u5217\u3092\u8a08\u753b\u3059\u308b\u80fd\u529b\u306e\u3053\u3068\u3067\u3059\u3002\u4eba\u9593\u306e\u554f\u984c\u89e3\u6c7a\u3067\u3082\u6838\u5fc3\u3068\u306a\u308b\u30b9\u30ad\u30eb\u3067\u3042\u308a\u3001LLM\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u3068\u3063\u3066\u3082 <strong>\u300c\u3044\u3064\u30fb\u4f55\u3092\u30fb\u3069\u306e\u9806\u5e8f\u3067\u884c\u3046\u304b\u300d<\/strong> \u3092\u6c7a\u5b9a\u3059\u308b\u91cd\u8981\u306a\u5f79\u5272\u3092\u679c\u305f\u3057\u307e\u3059\u3002\u521d\u671f\u306eLLM\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3067\u306f\u3001\u4e0e\u3048\u3089\u308c\u305f\u30bf\u30b9\u30af\u3092\u3044\u304d\u306a\u308a\u89e3\u7b54\u3055\u305b\u308b\u306e\u3067\u306f\u306a\u304f\u3001\u4f8b\u3048\u3070ReAct\u306e\u3088\u3046\u306bLLM\u81ea\u8eab\u306bCoT\u3068\u884c\u52d5\u5019\u88dc\u3092\u9010\u6b21\u751f\u6210\u3055\u305b\u308b\u30d7\u30ed\u30f3\u30d7\u30c8\u624b\u6cd5\u304c\u8a66\u307f\u3089\u308c\u307e\u3057\u305f\u3002\u3057\u304b\u3057\u30d7\u30ed\u30f3\u30d7\u30c8\u5de5\u592b\u3084Few-shot\u4f8b\u306b\u57fa\u3065\u304f\u3053\u308c\u3089\u306e\u9759\u7684\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\u3067\u306f\u3001\u65b0\u3057\u3044\u72b6\u6cc1\u3078\u306e\u9069\u5fdc\u3084\u8a66\u884c\u932f\u8aa4\u306b\u3088\u308b\u6226\u7565\u6539\u5584\u306f\u56f0\u96e3\u3067\u3057\u305f\u3002 RL\u306f\u3053\u306e\u554f\u984c\u306b\u5bfe\u3057\u3001\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\u6226\u7565\u3092\u7d4c\u9a13\u304b\u3089\u5b66\u7fd2\u3055\u305b\u308b\u3068\u3044\u3046\u30a2\u30d7\u30ed\u30fc\u30c1\u3092\u63d0\u4f9b\u3057\u307e\u3059\u3002<br \/>RAP\u3067\u306f\u30e2\u30f3\u30c6\u30ab\u30eb\u30ed\u6728\u63a2\u7d22\uff08Monte Carlo Tree Search\u3001MCTS\uff09\u306b\u3088\u308b\u63a2\u7d22\u3092\u901a\u3058\u3066CoT\u4ee5\u4e0a\u306e\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\u3092\u5b9f\u73fe\u3057\u3066\u3044\u307e\u3059\u3002\u5f93\u6765\u306eCoT\u306f\u76f4\u7dda\u7684\u306a\u601d\u8003\u904e\u7a0b\u3092\u751f\u6210\u3057\u3066\u3044\u307e\u3057\u305f\u304c\u3001RAP\u306fLLM\u3092\u4e16\u754c\u30e2\u30c7\u30eb\u3068\u3057\u3066\u6271\u3044\u3001MCTS\u3092\u7528\u3044\u3066\u30c4\u30ea\u30fc\u30d9\u30fc\u30b9\u306e\u601d\u8003\u904e\u7a0b\uff08\u72b6\u614b\uff09\u3092\u751f\u6210\u3057\u3001\u5831\u916c\u304c\u6700\u3082\u9ad8\u304f\u306a\u308b\u3088\u3046\u306a\u63a8\u8ad6\u904e\u7a0b\u306e\u30d1\u30b9\u3092\u5148\u8aad\u307f\u63a2\u7d22\u306b\u3088\u308a\u9078\u629e\u3059\u308b\u3053\u3068\u3067\u3001\u63a8\u8ad6\u6642\u9593\u306f\u5897\u3048\u308b\u3082\u306e\u306e\u3001\u3088\u308a\u5805\u7262\u306a\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\u3092\u5b9f\u73fe\u3057\u3066\u3044\u307e\u3059\u3002\u5404\u63a8\u8ad6\u30b9\u30c6\u30c3\u30d7\u306e\u5831\u916c\uff08\u8a55\u4fa1\u5024\uff09\u306b\u306f\u3001\u884c\u52d5\u306e\u5c24\u3082\u3089\u3057\u3055\u3001\u72b6\u614b\u306e\u4fe1\u983c\u5ea6\u3001\u81ea\u5df1\u8a55\u4fa1\u3001\u30b4\u30fc\u30eb\u307e\u3067\u306e\u8fd1\u3055\u306e\u3088\u3046\u306a\u8907\u6570\u306e\u6307\u6a19\u304c\u63a1\u7528\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u3061\u3089\u306f\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u884c\u308f\u306a\u3044\u305f\u3081RL\u3092\u7528\u3044\u3066\u3044\u308b\u308f\u3051\u3067\u306f\u3042\u308a\u307e\u305b\u3093\u304c\u3001\u8208\u5473\u6df1\u3044\u306e\u3067\u7d39\u4ecb\u3057\u3066\u304a\u304d\u307e\u3059\u3002<\/p>\n<p data-line=\"162\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--C0-WyAb9--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/5a665bf2798b1c54658c351d.png%3Fsha%3Df58b353c88e6dddb3f1f5680ea5b8a56561f86be\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Reasoning with Language Model is Planning with World Model (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2305.14992\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2305.14992<\/a>)<\/em><\/p>\n<hr data-line=\"165\" class=\"code-line\"\/>\n<h3 id=\"%E8%87%AA%E5%B7%B1%E6%94%B9%E5%96%84%E3%83%BB%E8%87%AA%E5%B7%B1%E5%8F%8D%E7%9C%81%EF%BC%88self-improvement-%2F-reflection%EF%BC%89\" data-line=\"167\" class=\"code-line\">\n \u81ea\u5df1\u6539\u5584\u30fb\u81ea\u5df1\u53cd\u7701\uff08Self-Improvement \/ Reflection\uff09<\/h3>\n<p data-line=\"168\" class=\"code-line\">\u81ea\u5df1\u6539\u5584\uff08Self-Improvement\uff09\u3068\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u81ea\u8eab\u306e\u51fa\u529b\u3084\u884c\u52d5\u3092\u632f\u308a\u8fd4\u308a\u3001\u8aa4\u308a\u3092\u8a02\u6b63\u3057\u305f\u308a\u6226\u7565\u3092\u6d17\u7df4\u3057\u305f\u308a\u3059\u308b\u80fd\u529b\u3067\u3059\u3002LLM\u306f\u81ea\u5df1\u53cd\u7701\u3084\u81ea\u5df1\u691c\u8a3c\u306e\u30d7\u30ed\u30f3\u30d7\u30c8\u3092\u4e0e\u3048\u308b\u3053\u3068\u3067\u56de\u7b54\u7cbe\u5ea6\u3092\u4e0a\u3052\u308b\u3053\u3068\u3082\u53ef\u80fd\u3067\u3059\u304c\u3001Agentic RL\u3067\u306f\u3053\u308c\u3092\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u5185\u90e8\u30eb\u30fc\u30d7\u306b\u7d44\u307f\u8fbc\u307f\u3001\u5b66\u7fd2\u306b\u3088\u3063\u3066\u6700\u9069\u5316\u3057\u307e\u3059\u3002<\/p>\n<p data-line=\"170\" class=\"code-line\">KnownSelf\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u30bf\u30b9\u30af\u3092\u5b9f\u884c\u3059\u308b\u969b\u3001\u73fe\u5728\u306e\u72b6\u6cc1\u3092\u81ea\u3089\u632f\u308a\u8fd4\u308a\u300c\u3053\u306e\u30bf\u30b9\u30af\u306f\u7c21\u5358\u3060\u304b\u3089\u3059\u3050\u3067\u304d\u308b\uff08Fast thinking\uff09\u300d\u300c\u5c11\u3057\u96e3\u3057\u3044\u304b\u3089\u3001\u4e00\u5ea6\u7acb\u3061\u6b62\u307e\u3063\u3066\u8003\u3048\u76f4\u305d\u3046\uff08Slow thinking\uff09\u300d\u300c\u81ea\u5206\u306e\u80fd\u529b\u3067\u306f\u7121\u7406\u3060\u304b\u3089\u3001\u5916\u90e8\u306e\u77e5\u8b58\u3092\u4f7f\u304a\u3046\uff08Knowledgeable thinking\uff09\u300d\u3068\u3044\u3063\u305f\u3088\u3046\u306b\u3001\u81ea\u8eab\u306e\u72b6\u614b\u306b\u5fdc\u3058\u3066\u601d\u8003\u30d7\u30ed\u30bb\u30b9\u3084\u77e5\u8b58\u306e\u5229\u7528\u3092\u81ea\u5f8b\u7684\u306b\u5207\u308a\u66ff\u3048\u308b\u3053\u3068\u3092\u53ef\u80fd\u306b\u3057\u307e\u3059\u3002<br \/>\u5177\u4f53\u7684\u306b\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u751f\u6210\u3057\u305f\u884c\u52d5\u30923\u3064\u306e\u601d\u8003\u30d1\u30bf\u30fc\u30f3\u306b\u30eb\u30fc\u30eb\u30d9\u30fc\u30b9\u3067\u5206\u985e\u3057\u3066SFT\u3067\u5b66\u7fd2\u5f8c\u30012\u3064\u306e\u5fdc\u7b54\u30da\u30a2\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u7528\u610f\u3057\u3001DPO\u306b\u3088\u308b\u9078\u597d\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3092\u884c\u3044\u307e\u3059\u3002\u3053\u306e\u30d7\u30ed\u30bb\u30b9\u306b\u3088\u3063\u3066\u3001ALFWorld\u3068\u3044\u3046\u5bb6\u5ead\u5185\u3067\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u7269\u4f53\u3092\u64cd\u4f5c\u3059\u308b\u30bf\u30b9\u30af\u3084WebShop\u3068\u3044\u3046Web\u30b5\u30a4\u30c8\u3067\u6307\u793a\u3069\u304a\u308a\u5546\u54c1\u3092\u8cfc\u5165\u3059\u308b\u30bf\u30b9\u30af\u3067\u6027\u80fd\u5411\u4e0a\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--FKWgIN-_--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/2e6fd6313f34389eb9fc7a54.png%3Fsha%3D20a96ec6ab7456e9fb6994bc04af5190975da2ed\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Agentic Knowledgeable Self-awareness (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2504.03553\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2504.03553<\/a>)<\/em><\/p>\n<p data-line=\"175\" class=\"code-line\">\u81ea\u5df1\u53cd\u7701\u3068\u306f\u5c11\u3057\u65b9\u5411\u6027\u304c\u5909\u308f\u308a\u307e\u3059\u304c\u3001\u4eba\u9593\u306e\u4ecb\u5165\u306a\u3057\u3067\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u81ea\u8eab\u3067\u5b66\u7fd2\u3092\u884c\u3046\u81ea\u5df1\u6539\u5584\u306e\u7814\u7a76\u3082\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002<br \/>Absolute Zero\u306f\u4eba\u9593\u304c\u4f5c\u6210\u3057\u305f\u30bf\u30b9\u30af\u3084\u30e9\u30d9\u30eb\u3092\u4e00\u5207\u4f7f\u7528\u305b\u305aLLM\u304c\u81ea\u5f8b\u7684\u306b\u81ea\u5df1\u6539\u5584\u3092\u884c\u3046\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u3059\u3002\u3053\u306e\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u3067\u306fLLM\u304c\u554f\u984c\u306e\u63d0\u6848\u3092\u884c\u3046Proposer\u3068\u554f\u984c\u306e\u89e3\u6c7a\u3092\u884c\u3046Solver\u306e2\u3064\u306e\u5f79\u5272\u3092\u62c5\u3044\u307e\u3059\u3002Solver\u3067\u306fProposer\u304c\u751f\u6210\u3057\u305f\u554f\u984c\u306b\u6b63\u7b54\u3057\u305f\u5834\u5408\u306b\u306e\u307f\u5831\u916c1\u3092\u5f97\u307e\u3059\u3002\u4e00\u65b9\u3001Proposer\u306f<embed-katex><eq class=\"zenn-katex\">r_{proposer} = 1-r_{solver}<\/eq><\/embed-katex>\u306e\u3088\u3046\u306bSolver\u306e\u5831\u916c\u3092\u5c0f\u3055\u304f\u3059\u308b\u3088\u3046\u306a\u554f\u984c\u3092\u63d0\u6848\u3059\u308b\u5834\u5408\u306b\u9ad8\u3044\u5831\u916c\u304c\u4e0e\u3048\u3089\u308c\u307e\u3059\u3002\u305f\u3060\u3057\u554f\u984c\u304c\u96e3\u3057\u3059\u304e\u305f\u308a\u7c21\u5358\u3059\u304e\u308b\u3068\u81ea\u5df1\u6539\u5584\u304c\u9032\u307e\u306a\u3044\u3053\u3068\u304b\u3089<embed-katex><eq class=\"zenn-katex\">r_{solver}<\/eq><\/embed-katex>\u304c0 or 1\u306e\u5834\u5408\u306fProposer\u306e\u5831\u916c\u30820\u306b\u306a\u308b\u3088\u3046\u306a\u5831\u916c\u8a2d\u8a08\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002\u306a\u3093\u3068\u306a\u304fGAN(Generative Adversarial Network)\u306b\u4f3c\u305f\u69cb\u9020\u3092\u611f\u3058\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--4YtlqPff--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/b18640a48464fe405637bcb2.png%3Fsha%3D23219c3be7b712e7a38efa37492f961ee166b6fb\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Absolute Zero: Reinforced Self-play Reasoning with Zero Data (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.03335\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.03335<\/a>)<\/em><\/p>\n<p data-line=\"180\" class=\"code-line\">TTRL\u306f\u6b63\u89e3\u30e9\u30d9\u30eb\u306e\u306a\u3044\u30c7\u30fc\u30bf\u3092\u4f7f\u3063\u3066\u63a8\u8ad6\u6642\uff08\u30c6\u30b9\u30c8\u6642\uff09\u306b\u81ea\u5df1\u9032\u5316\u306b\u3088\u308b\u6027\u80fd\u5411\u4e0a\u3092\u56f3\u308b\u3082\u306e\u3067\u3059\u3002\u5177\u4f53\u7684\u306b\u306f\u3001LLM\u81ea\u8eab\u304c\u751f\u6210\u3057\u305f\u8907\u6570\u306e\u56de\u7b54\u306e\u4e2d\u304b\u3089\u591a\u6570\u6c7a\u3067\u6700\u5927\u6295\u7968\u306e\u4e88\u6e2c\u3092\u6b63\u89e3\u3068\u307f\u306a\u3057\u3066\u64ec\u4f3c\u6b63\u89e3\u30c7\u30fc\u30bf\u3092\u4f5c\u6210\u3057\u3001\u64ec\u4f3c\u6b63\u89e3\u30e9\u30d9\u30eb\u3068\u4e88\u6e2c\u304c\u4e00\u81f4\u3057\u3066\u3044\u308b\u304b\u3092\u5831\u916c\u3068\u3057\u3066RL\u3092\u884c\u3046\u3053\u3068\u3067\u3001\u4eba\u9593\u306e\u30e9\u30d9\u30ea\u30f3\u30b0\u306a\u3057\u306b\u30e2\u30c7\u30eb\u306e\u63a8\u8ad6\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u3060\u3051\u898b\u308b\u3068\u3001\u5358\u306b\u9078\u3070\u308c\u3084\u3059\u3044\u56de\u7b54\u3092\u3088\u308a\u9078\u3070\u308c\u3084\u3059\u304f\u306a\u308b\uff08\u3064\u307e\u308a\u78ba\u7387\u5206\u5e03\u3092\u5c16\u3089\u305b\u308b\uff09\u3088\u3046\u306b\u30d5\u30a1\u30a4\u30f3\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3057\u3066\u3044\u308b\u3060\u3051\u306e\u3088\u3046\u306b\u3082\u601d\u3048\u307e\u3059\u304c\u3001\u5b9f\u9a13\u3067\u306f\u6570\u5b66\u306e\u3042\u308b\u7279\u5b9a\u306e\u30bf\u30b9\u30af\u3067TTRL\u3057\u305f\u30e2\u30c7\u30eb\u3067\u3001\u7570\u306a\u308b\u6570\u5b66\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u3082\u6027\u80fd\u304c\u5411\u4e0a\u3059\u308b\u3053\u3068\u304c\u793a\u3055\u308c\u3066\u304a\u308a\u3001\u6c4e\u5316\u6027\u80fd\u306e\u5411\u4e0a\u304c\u78ba\u8a8d\u3055\u308c\u305f\u3088\u3046\u3067\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--1QnTw7Ld--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/5df262574f6d0bdf0aca7a41.png%3Fsha%3D10ee089cc06099a255dabe46b40436a93893f5a6\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>TTRL: Test-Time Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2504.16084\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2504.16084<\/a>)<\/em><\/p>\n<hr data-line=\"184\" class=\"code-line\"\/>\n<h3 id=\"%E7%9F%A5%E8%A6%9A%EF%BC%88perception%EF%BC%89\" data-line=\"186\" class=\"code-line\">\n \u77e5\u899a\uff08Perception\uff09<\/h3>\n<p data-line=\"187\" class=\"code-line\">\u77e5\u899a\uff08Perception\uff09\u306f\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u30c6\u30ad\u30b9\u30c8\u4ee5\u5916\u306e\u30e2\u30c0\u30ea\u30c6\u30a3\uff08\u753b\u50cf\u3001\u97f3\u58f0\u3001\u5b9f\u4e16\u754c\u306e\u30bb\u30f3\u30b5\u30c7\u30fc\u30bf\u306a\u3069\uff09\u3092\u7406\u89e3\u30fb\u8a8d\u8b58\u3059\u308b\u80fd\u529b\u3067\u3059\u3002LLM\u306e\u63a8\u8ad6\u3092\u5f37\u5316\u3059\u308bRL\u306e\u6210\u529f\u306b\u89e6\u767a\u3055\u308c\u3001\u3053\u308c\u3089\u306e\u6210\u679c\u3092\u30de\u30eb\u30c1\u30e2\u30fc\u30c0\u30eb\u5b66\u7fd2\u3078\u5fdc\u7528\u3059\u308b\u53d6\u308a\u7d44\u307f\u304c\u9032\u3081\u3089\u308c\u3066\u3044\u307e\u3059\u3002<\/p>\n<p data-line=\"189\" class=\"code-line\">Vision-R1\u306f\u3001\u753b\u50cf\u3068\u30c6\u30ad\u30b9\u30c8\u3092\u540c\u6642\u306b\u7406\u89e3\u3059\u308b\u30de\u30eb\u30c1\u30e2\u30fc\u30c0\u30eb\u5927\u898f\u6a21\u8a00\u8a9e\u30e2\u30c7\u30eb\uff08Multimodal Large Language Model\u3001MLLM\uff09\u3092\u7528\u3044\u3066\u3001\u7279\u306b\u6570\u5b66\u306e\u56f3\u5f62\u554f\u984c\u306e\u3088\u3046\u306a\u8907\u96d1\u306a\u8996\u899a\u7684\u63a8\u8ad6\u30bf\u30b9\u30af\u306b\u304a\u3044\u3066\u4eba\u9593\u306e\u3088\u3046\u306a\u6df1\u3044\u601d\u8003\u30d7\u30ed\u30bb\u30b9\u3092\u518d\u73fe\u3059\u308b\u3053\u3068\u3092\u76ee\u6307\u3057\u3066\u3044\u307e\u3059\u3002DeepSeek-R1\u306e\u3088\u3046\u306bRL\u3092\u7528\u3044\u3066\u6570\u5b66\u554f\u984c\u306b\u5bfe\u3059\u308b\u63a8\u8ad6\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3067\u3059\u304c\u3001\u5358\u7d14\u306bRL\u3092\u9069\u7528\u3059\u308b\u306e\u3067\u306f\u306a\u304f\u3001\u300cDeepSeek-R1\u306e\u6a21\u5023\u5b66\u7fd2\u300d\u3068\u300c\u6bb5\u968e\u7684\u601d\u8003\u6291\u5236\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u300d\u3068\u3044\u30462\u6bb5\u968e\u306e\u5b66\u7fd2\u3092\u7d44\u307f\u5408\u308f\u305b\u3066\u3044\u308b\u306e\u304c\u7279\u5fb4\u3067\u3059\u3002<\/p>\n<ul data-line=\"190\" class=\"code-line\">\n<li data-line=\"190\" class=\"code-line\">1\u6bb5\u968e\u76ee: MLLM\u3092\u7528\u3044\u3066\u8996\u899a\u60c5\u5831\u3092\u8a73\u7d30\u306a\u30c6\u30ad\u30b9\u30c8\u8a18\u8ff0\u306b\u5909\u63db\u3055\u305b\u308bModality Bridging\u3092\u5b9f\u65bd\u3057\u3001\u305d\u306e\u30c6\u30ad\u30b9\u30c8\u3092DeepSeek-R1\u306b\u6e21\u3059\u3053\u3068\u3067\u8a73\u7d30\u306aCoT\u3092\u51fa\u529b\u3055\u305b\u307e\u3059\u3002DeepSeek-R1\u306eCoT\u3092\u6b63\u89e3\u30e9\u30d9\u30eb\u3068\u3057\u3066MLLM\u3092\u6a21\u5023\u5b66\u7fd2\u3059\u308b\u3053\u3068\u3067\u3001\u8996\u899a\u60c5\u5831\u306b\u57fa\u3065\u304fCoT\u3092\u5b89\u5b9a\u7684\u306b\u751f\u6210\u3067\u304d\u308b\u3088\u3046\u306b\u3057\u307e\u3059\u3002<\/li>\n<li data-line=\"191\" class=\"code-line\">2\u6bb5\u968e\u76ee: 1\u6bb5\u968e\u76ee\u7d42\u4e86\u6642\u70b9\u3067\u306fCoT\u304c\u9577\u304f\u306a\u308b\u3068\u6027\u80fd\u304c\u4f4e\u4e0b\u3059\u308b\u50be\u5411\u304c\u3042\u308b\u3053\u3068\u304b\u3089\u30012\u6bb5\u968e\u76ee\u3067\u306f\u601d\u8003\u306e\u9577\u3055\u3092\u5236\u9650\u3057\u6bb5\u968e\u7684\u306b\u5897\u3084\u3057\u306a\u304c\u3089\u3001RL\u3067\u8996\u899a\u60c5\u5831\u3092\u542b\u3081\u305f\u63a8\u8ad6\u80fd\u529b\u5411\u4e0a\u3092\u884c\u3046\u6bb5\u968e\u7684\u601d\u8003\u6291\u5236\u30c8\u30ec\u30fc\u30cb\u30f3\u30b0\u3092\u5b9f\u65bd\u3057\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--vx2lsr3Y--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/94468dd0f729fbb8fe818015.png%3Fsha%3D375d5dc29435226fd77ad3961f1cae49a46c83f3\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Vision-R1: Incentivizing Reasoning Capability in Multimodal Large Language Models (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2503.06749\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2503.06749<\/a>)<\/em>\n<\/li>\n<\/ul>\n<p data-line=\"195\" class=\"code-line\">OPENTHINKIMG\u306f\u3001\u8996\u899a\u30c4\u30fc\u30eb\u3092\u4f7f\u3063\u3066\u8996\u899a\u7684\u554f\u984c\u3092\u89e3\u304f\u305f\u3081\u306e\u5b66\u7fd2\u306bRL\u3092\u5229\u7528\u3057\u3066\u3044\u307e\u3059\u3002<br \/>\u5177\u4f53\u7684\u306b\u306f\u3001\u753b\u50cf\u3068\u30c6\u30ad\u30b9\u30c8\u3092VLM\u306b\u5165\u529b\u3057\u3001\u30b0\u30e9\u30d5\u306e\u6570\u5024\u3092\u8aad\u307f\u53d6\u308bOCR\u30c4\u30fc\u30eb\u3001\u753b\u50cf\u306e\u4e00\u90e8\u3092\u62e1\u5927\u3059\u308b\u30ba\u30fc\u30e0\u30c4\u30fc\u30eb\u3068\u3044\u3063\u305f\u8996\u899a\u7684\u306a\u30c4\u30fc\u30eb\u3092VLM\u304c\u64cd\u4f5c\u3057\u306a\u304c\u3089\u8996\u899a\u7684\u554f\u984c\u3092\u89e3\u304d\u307e\u3059\u3002\u30e2\u30c7\u30eb\u306f\u74b0\u5883\u5185\u3067\u30c4\u30fc\u30eb\u3092\u81ea\u7531\u306b\u4f7f\u3044\u3001\u30c4\u30fc\u30eb\u306e\u5229\u7528\u7d50\u679c\u3092\u8996\u899a\u60c5\u5831\u3068\u3057\u3066\u30a4\u30f3\u30d7\u30c3\u30c8\u3057\u306a\u304c\u3089\u3001\u6700\u7d42\u7684\u306a\u30bf\u30b9\u30af\u306e\u6b63\u89e3\u30fb\u4e0d\u6b63\u89e3\u3068\u3044\u3046\u5831\u916c\u3092\u6700\u5927\u5316\u3059\u308b\u3088\u3046\u306b\u65b9\u7b56\u3092\u66f4\u65b0\u3057\u3066\u3044\u304d\u307e\u3059\u3002\u7279\u306b\u3001\u30c4\u30fc\u30eb\u306e\u8996\u899a\u7684\u306a\u51fa\u529b\u3092\u30e2\u30c7\u30eb\u306e\u6b21\u306e\u5224\u65ad\u6750\u6599\u3068\u3057\u3066\u76f4\u63a5\u5229\u7528\u3059\u308b\u70b9\u304c\u91cd\u8981\u3067\u3001\u3053\u308c\u306b\u3088\u308a\u3001\u30e2\u30c7\u30eb\u306f\u81ea\u3089\u306e\u884c\u52d5\u304c\u8996\u899a\u7684\u306b\u3069\u306e\u3088\u3046\u306a\u7d50\u679c\u3092\u3082\u305f\u3089\u3059\u304b\u3092\u7406\u89e3\u3057\u306a\u304c\u3089\u3001\u3088\u308a\u8ce2\u3044\u30c4\u30fc\u30eb\u9078\u629e\u304c\u3067\u304d\u308b\u3088\u3046\u306b\u306a\u308a\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--0XllqqA1--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/cbd77a869730b8ce69a906b8.png%3Fsha%3D2f2e613ec091d57a18cf0f8fd87e14bc5cb48f8b\" alt=\"alt text\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>OPENTHINKIMG: Learning to Think with Images via Visual Tool Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.08617\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.08617<\/a>)<\/em><\/p>\n<p data-line=\"200\" class=\"code-line\">Visual Planning\u3067\u306f\u3001\u4eba\u9593\u304c\u982d\u306e\u4e2d\u3067\u5730\u56f3\u3092\u601d\u3044\u6d6e\u304b\u3079\u305f\u308a\u3001\u5bb6\u5177\u306e\u914d\u7f6e\u3092\u30b7\u30df\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u3057\u305f\u308a\u3059\u308b\u3088\u3046\u306b\u3001\u30e2\u30c7\u30eb\u304c\u8a00\u8a9e\u3067\u306f\u306a\u304f\u753b\u50cf\u306b\u3088\u3063\u3066\u30bf\u30b9\u30af\u306e\u8a08\u753b\u3092\u7acb\u3066\u308b\u3053\u3068\u3092\u76ee\u6307\u3057\u3066\u3044\u307e\u3059\u3002\u30e2\u30c7\u30eb\u306f\u73fe\u5728\u306e\u753b\u50cf\u72b6\u614b\u304b\u3089\u6b21\u306e\u753b\u50cf\u72b6\u614b\u3092\u8907\u6570\u5019\u88dc\u751f\u6210\u3057\u3001\u524d\u306e\u72b6\u614b\u3068\u73fe\u5728\u306e\u72b6\u614b\u3068\u306e\u5dee\u5206\u304b\u3089\u884c\u52d5\uff08\u30ca\u30d3\u30b2\u30fc\u30b7\u30e7\u30f3\u306e\u5834\u5408\u3001\u4e0a\u4e0b\u5de6\u53f3\u306e\u79fb\u52d5\u65b9\u5411\u304c\u884c\u52d5\u306b\u8a72\u5f53\uff09\u3092\u30eb\u30fc\u30eb\u30d9\u30fc\u30b9\u3067\u7b97\u51fa\u3057\u307e\u3059\u3002\u3053\u306e\u30b9\u30c6\u30c3\u30d7\u3092\u7e70\u308a\u8fd4\u3057\u3001\u30b4\u30fc\u30eb\u306b\u8fd1\u3065\u304f\u5834\u5408\u306b\u5831\u916c\u3092\u4e0e\u3048\u308b\u3053\u3068\u3067\u3001\u30b4\u30fc\u30eb\u307e\u3067\u306e\u884c\u52d5\u8a08\u753b\u3092\u753b\u50cf\u30d9\u30fc\u30b9\u3067\u5b66\u7fd2\u3057\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--PrzFjVXo--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/1c606f22b80651331e44e846.png%3Fsha%3D87a6d66b4c23a3b348dd7961fb50d279b0bee970\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Visual Planning: Let\u2019s Think Only with Images (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.11409\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.11409<\/a>)<\/em><\/p>\n<h2 id=\"%E4%B8%BB%E3%81%AA%E5%BF%9C%E7%94%A8%E5%88%86%E9%87%8E%E3%81%A8%E4%BB%A3%E8%A1%A8%E7%9A%84%E3%81%AA%E6%89%8B%E6%B3%95\" data-line=\"205\" class=\"code-line\">\n \u4e3b\u306a\u5fdc\u7528\u5206\u91ce\u3068\u4ee3\u8868\u7684\u306a\u624b\u6cd5<\/h2>\n<p data-line=\"206\" class=\"code-line\">Agentic RL\u306f\u3055\u307e\u3056\u307e\u306a\u30bf\u30b9\u30af\u9818\u57df\u3067\u5fdc\u7528\u304c\u59cb\u307e\u3063\u3066\u304a\u308a\u3001\u3053\u306e\u8ad6\u6587\u3067\u306f\u4ee5\u4e0b\u306e\u5fdc\u7528\u5206\u91ce\u304c\u6319\u3052\u3089\u308c\u3066\u3044\u307e\u3059\u3002\u672c\u7bc0\u3067\u306f\u3001\u5404\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u9818\u57df\u306b\u304a\u3044\u3066RL\u304c\u3069\u306e\u3088\u3046\u306b\u6d3b\u7528\u3055\u308c\u3066\u3044\u308b\u304b\u3001\u4ee3\u8868\u7684\u306a\u624b\u6cd5\u30fb\u7814\u7a76\u4f8b\u3092\u7d39\u4ecb\u3057\u307e\u3059\u3002<\/p>\n<ul data-line=\"207\" class=\"code-line\">\n<li data-line=\"207\" class=\"code-line\">\u691c\u7d22\u30fb\u8abf\u67fb\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/li>\n<li data-line=\"208\" class=\"code-line\">\u30b3\u30fc\u30c9\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/li>\n<li data-line=\"209\" class=\"code-line\">\u6570\u5b66\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/li>\n<li data-line=\"210\" class=\"code-line\">GUI\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/li>\n<li data-line=\"211\" class=\"code-line\">\u30de\u30eb\u30c1\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/li>\n<li data-line=\"212\" class=\"code-line\">\u305d\u306e\u4ed6(Vision, \u8eab\u4f53\u6027)<\/li>\n<\/ul>\n<p data-line=\"214\" class=\"code-line\"><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--YJ5Crbfd--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/c71d57f78bb95be984023529.png%3Fsha%3D36d919798bd0c4b31b3a38d2754d3ca4dc708a33\" alt=\"alt text\" class=\"md-img\" loading=\"lazy\"\/><\/p>\n<h3 id=\"%E6%A4%9C%E7%B4%A2%E3%83%BB%E8%AA%BF%E6%9F%BB%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88\" data-line=\"216\" class=\"code-line\">\n \u691c\u7d22\u30fb\u8abf\u67fb\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/h3>\n<p data-line=\"217\" class=\"code-line\">\u691c\u7d22\u30fb\u8abf\u67fb\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u3001\u5916\u90e8\u306e\u77e5\u8b58\u30d9\u30fc\u30b9\u3084Web\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u3092\u6d3b\u7528\u3057\u3066\u3001\u30e6\u30fc\u30b6\u30fc\u306e\u8cea\u554f\u3084\u8abf\u67fb\u4f9d\u983c\u306b\u5bfe\u3057\u3066\u6b63\u78ba\u304b\u3064\u5305\u62ec\u7684\u306a\u56de\u7b54\u3092\u63d0\u4f9b\u3059\u308b\u3053\u3068\u3092\u76ee\u7684\u3068\u3057\u3066\u3044\u307e\u3059\u3002<br \/>LLM\u306b\u691c\u7d22\u80fd\u529b\u3092\u4ed8\u4e0e\u3059\u308b\u65b9\u6cd5\u3068\u3057\u3066RAG\u304c\u5e83\u304f\u7528\u3044\u3089\u308c\u3066\u3044\u307e\u3059\u304c\u3001\u691c\u7d22\u3068\u63a8\u8ad6\u3092\u4ea4\u4e92\u306b\u884c\u3046\u3088\u3046\u306a\u8907\u96d1\u306a\u30de\u30eb\u30c1\u30bf\u30fc\u30f3\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u306f\u5b66\u7fd2\u3092\u884c\u308f\u306a\u3044\u30d7\u30ed\u30f3\u30d7\u30c8\u30d9\u30fc\u30b9\u306e\u624b\u6cd5\u3067\u306f\u9650\u754c\u304c\u3042\u308b\u3053\u3068\u304b\u3089\u3001\u691c\u7d22\u30af\u30a8\u30ea\u306e\u751f\u6210\u3001\u691c\u7d22\u3001\u63a8\u8ad6\u3092end2end\u3067\u76f4\u63a5\u6700\u9069\u5316\u3059\u308b\u305f\u3081\u306bRL\u3092\u5229\u7528\u3059\u308b\u7814\u7a76\u304c\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p data-line=\"220\" class=\"code-line\">\u4e3b\u8981\u306a\u7814\u7a76\u306e\u4e00\u3064\u306f\u3001RAG\u306e\u57fa\u76e4\u3092\u6d3b\u7528\u3057\u3064\u3064Web\u691c\u7d22API\u3092\u5229\u7528\u3057\u3066\u3001\u30af\u30a8\u30ea\u751f\u6210\u3068\u591a\u6bb5\u968e\u306e\u63a8\u8ad6\u3092RL\u3067\u6700\u9069\u5316\u3059\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u3067\u3059\u3002<br \/>search-R1\u306f<code><think\/><\/code>(\u601d\u8003), <code><search\/><\/code>(\u691c\u7d22\u30af\u30a8\u30ea),<code><information\/><\/code>(\u691c\u7d22\u7d50\u679c), <code><answer\/><\/code>(\u56de\u7b54)\u3068\u3044\u30464\u3064\u306e\u7279\u6b8a\u30c8\u30fc\u30af\u30f3\u3092\u5c0e\u5165\u3057\u3001\u601d\u8003\u3068\u691c\u7d22\u3092\u8907\u6570\u30bf\u30fc\u30f3\u7e70\u308a\u8fd4\u3057\u6700\u7d42\u56de\u7b54\u3092\u3059\u308b\u30d7\u30ed\u30bb\u30b9\u3092PPO\u3084GRPO\u3068\u3044\u3063\u305fRL\u30a2\u30eb\u30b4\u30ea\u30ba\u30e0\u3067\u5b66\u7fd2\u3057\u307e\u3059\u3002\u601d\u8003\u3001\u691c\u7d22\u30af\u30a8\u30ea\u3001\u56de\u7b54\u3092\u305d\u308c\u305e\u308c\u884c\u52d5\u3068\u3057\u3066\u6271\u3044\u3001\u6700\u7d42\u7684\u306a\u56de\u7b54\u304c\u6b63\u89e3\u304b\u3069\u3046\u304b\u3092\u5831\u916c\u3068\u3057\u3066\u4e0e\u3048\u308b\u3053\u3068\u3067\u691c\u7d22\u3068\u63a8\u8ad6\u306e\u4e21\u65b9\u306e\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u3066\u3044\u307e\u3059\u3002\u307e\u305f<code><information\/><\/code>(\u691c\u7d22\u7d50\u679c)\u306b\u5bfe\u3057\u3066\u306f\u640d\u5931\u8a08\u7b97\u3092\u884c\u308f\u306a\u3044\u3088\u3046\u306b\u3059\u308b\u3053\u3068\u3067\u691c\u7d22\u7d50\u679c\u305d\u306e\u3082\u306e\u3092\u5b66\u7fd2\u3059\u308b\u3053\u3068\u3092\u907f\u3051\u3001\u7d50\u679c\u3068\u3057\u3066\u5b66\u7fd2\u304c\u5b89\u5b9a\u3057\u6027\u80fd\u5411\u4e0a\u306b\u8ca2\u732e\u3057\u3066\u3044\u308b\u3088\u3046\u3067\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--Cned07Z6--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/1fd4d7bb2f847f6849e51a4c.png%3Fsha%3Dbdfa8d500949d6712f9778eb2ddb5caa72b2ccdf\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Search-R1: Training LLMs to Reason and Leverage Search Engines with Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2503.09516\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2503.09516<\/a>)<\/em><\/p>\n<p data-line=\"225\" class=\"code-line\">search-R1\u306e\u8ab2\u984c\u3068\u3057\u3066\u3001\u691c\u7d22\u30bf\u30fc\u30f3\u6570\u3092\u5927\u304d\u304f\u3059\u308b\u30681\u3064\u306e\u5b66\u7fd2\u306b\u3088\u308a\u591a\u304f\u306e\u6642\u9593\u3092\u8981\u3059\u308b\u305f\u3081\u3001\u5b66\u7fd2\u52b9\u7387\u306e\u89b3\u70b9\u304b\u3089\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u691c\u7d22\u30bf\u30fc\u30f3\u6570\u309210\u56de\u4ee5\u4e0b\u306b\u5236\u9650\u3059\u308b\u5fc5\u8981\u304c\u3042\u308a\u307e\u3057\u305f\u3002<br \/>ASearcher\u306fsearch-R1\u3092\u767a\u5c55\u3055\u305b\u305f\u3082\u306e\u3067\u3001\u8907\u6570\u306e\u691c\u7d22\u30bf\u30b9\u30af\u3092\u4e26\u884c\u51e6\u7406\u3059\u308b\u969b\u306b\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u3068\u30e2\u30c7\u30eb\u306e\u5b66\u7fd2\u3092\u5b8c\u5168\u306b\u5206\u96e2\u3057\u305f\u975e\u540c\u671f\u306a\u5b66\u7fd2\u30b7\u30b9\u30c6\u30e0\u3068\u3059\u308b\u3053\u3068\u3067\u5b66\u7fd2\u52b9\u7387\u3092\u6539\u5584\u3057\u7d50\u679c\u3068\u3057\u3066\u6700\u5927128\u30bf\u30fc\u30f3\u3068\u3044\u3046\u9577\u6642\u9593\u306e\u63a2\u7d22\u3092\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u5b66\u7fd2\u3055\u305b\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306a\u308a\u307e\u3057\u305f\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--AQiefxc5--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/8a2102505c8094a1e798da4b.png%3Fsha%3Db16791ee473b474b77c329f72f0ffbaf96673ea7\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Beyond Ten Turns: Unlocking Long-Horizon Agentic Search with Large-Scale Asynchronous RL (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2508.07976\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2508.07976<\/a>)<\/em><\/p>\n<p data-line=\"230\" class=\"code-line\">\u4e0a\u8a18\u306e\u3088\u3046\u306b\u5916\u90e8\u306eWeb\u691c\u7d22API\u3092\u5229\u7528\u3059\u308b\u65b9\u6cd5\u306f\u3001Web\u4e0a\u306e\u30c9\u30ad\u30e5\u30e1\u30f3\u30c8\u54c1\u8cea\u304c\u30ce\u30a4\u30ba\u3068\u306a\u308a\u5b66\u7fd2\u304c\u4e0d\u5b89\u5b9a\u306b\u306a\u308a\u5f97\u308b\u3053\u3068\u3001\u5b66\u7fd2\u306b\u5fc5\u8981\u3068\u306a\u308bAPI\u5229\u7528\u30b3\u30b9\u30c8\u304c\u9ad8\u3044\u3068\u3044\u30462\u3064\u306e\u8ab2\u984c\u304c\u3042\u308a\u307e\u3059\u3002<br \/>ZeroSearch\u306f\u3001\u5916\u90e8\u306e\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u3092\u52b9\u679c\u7684\u306b\u5229\u7528\u3059\u308b\u80fd\u529b\u3092\u5b66\u7fd2\u3055\u305b\u308b\u70b9\u3067\u306f\u4e0a\u8a18\u306e\u624b\u6cd5\u3068\u540c\u69d8\u3067\u3059\u304c\u3001\u6700\u5927\u306e\u7279\u5fb4\u306f\u5b66\u7fd2\u4e2d\u306b\u5b9f\u969b\u306e\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\uff08Google\u306a\u3069\uff09\u3092\u4e00\u5207\u4f7f\u308f\u306a\u3044\u70b9\u306b\u3042\u308a\u307e\u3059\u3002search-R1\u306e\u56f3\u3068ZeroSearch\u306e\u56f3\u3092\u6bd4\u8f03\u3059\u308b\u3068\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u884c\u52d5\u53ce\u96c6\u3092\u884c\u3046Rollout Module\u306b\u304a\u3044\u3066SearchEngine\u304cSimulationLLM\u306b\u7f6e\u304d\u63db\u308f\u3063\u3066\u3044\u308b\u3053\u3068\u304c\u78ba\u8a8d\u3067\u304d\u307e\u3059\u3002\u3053\u306e\u3088\u3046\u306b\u5b66\u7fd2\u5bfe\u8c61\u3068\u306f\u5225\u306eLLM\u3092\u4f7f\u3063\u3066\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u306e\u6319\u52d5\u3092\u6a21\u5023\u3057\u3001\u305d\u306e\u6a21\u5023\u3055\u308c\u305f\u74b0\u5883\u306e\u4e2d\u3067LLM\u306e\u691c\u7d22\u3068\u63a8\u8ad6\u80fd\u529b\u3092\u5b66\u7fd2\u3057\u307e\u3059\u3002\u7d50\u679c\u3068\u3057\u3066ZeroSearch\u306f\u5b9f\u969b\u306e\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u3067\u5b66\u7fd2\u3055\u305b\u305f\u30e2\u30c7\u30eb\u3068\u540c\u7b49\u3042\u308b\u3044\u306f\u305d\u308c\u4ee5\u4e0a\u306e\u6027\u80fd\u3092\u5727\u5012\u7684\u306b\u4f4e\u3044\u30b3\u30b9\u30c8\u3067\u9054\u6210\u3067\u304d\u308b\u3053\u3068\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002LLM\u3067\u691c\u7d22\u30a8\u30f3\u30b8\u30f3\u3092\u6a21\u5023\u3067\u304d\u308b\u304b\u3068\u3044\u3046\u70b9\u306f\u3061\u3087\u3063\u3068\u7591\u554f\u306a\u306e\u3067\u3053\u308c\u3067\u3046\u307e\u304f\u3044\u304f\u306e\u304c\u4e0d\u601d\u8b70\u306a\u611f\u3058\u306f\u3057\u307e\u3059\u304c\u3001\u9762\u767d\u3044\u7814\u7a76\u3060\u306a\u3068\u601d\u3044\u307e\u3057\u305f\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--EiO_f7bo--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/24fd21a5d0d86e95e233f1c6.png%3Fsha%3D7a0ec3d6f4dd4116e1f094c76a5e4a978f990821\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>ZEROSEARCH: Incentivize the Search Capability of LLMs without Searching (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.04588\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.04588<\/a>)<\/em><\/p>\n<h3 id=\"%E3%82%B3%E3%83%BC%E3%83%89%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88\" data-line=\"235\" class=\"code-line\">\n \u30b3\u30fc\u30c9\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/h3>\n<p data-line=\"236\" class=\"code-line\">\u30b3\u30fc\u30c9\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306fOpenAI\u306eCodex\u3084Anthropic\u306eClaude Code\u306e\u3088\u3046\u306a\u3001\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u30bf\u30b9\u30af\u306b\u7279\u5316\u3057\u305f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3067\u3059\u3002\u672c\u8ad6\u6587\u306b\u304a\u3044\u3066\u30b3\u30fc\u30c9\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u30bf\u30b9\u30af\u306f\u3001\u5358\u4e00\u30bf\u30fc\u30f3\u3067\u306e\u30b3\u30fc\u30c9\u751f\u6210\u3001\u8907\u6570\u30bf\u30fc\u30f3\u3067\u306e\u30b3\u30fc\u30c9\u6539\u826f\u3001\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u306e\u81ea\u52d5\u5316\u306e3\u3064\u306b\u5927\u5225\u3055\u308c\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u8a18\u4e8b\u3067\u306f\u3001\u7279\u306b\u96e3\u6613\u5ea6\u306e\u9ad8\u3044\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u3092\u81ea\u5f8b\u7684\u306b\u884c\u3046\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u7814\u7a76\u3092\u898b\u3066\u3044\u304d\u307e\u3059\u3002<br \/>\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u306f\u30b3\u30fc\u30c9\u306e\u8aad\u307f\u8fbc\u307f\u30fb\u4fee\u6b63\u30fb\u8ffd\u52a0\u306b\u52a0\u3048\u3066\u3001\u5916\u90e8\u30c4\u30fc\u30eb\uff08\u30b3\u30f3\u30d1\u30a4\u30e9\u3001\u30ea\u30f3\u30bf\u30fc\u3001\u30d0\u30fc\u30b8\u30e7\u30f3\u7ba1\u7406\u3001\u30b7\u30a7\u30eb\uff09\u306e\u5229\u7528\u3084\u30c6\u30b9\u30c8\u306b\u3088\u308b\u7d50\u679c\u306e\u691c\u8a3c\u306a\u3069\u3001\u8907\u96d1\u3067\u9577\u671f\u7684\u306a\u30b9\u30c6\u30c3\u30d7\u3092\u4f34\u3046\u30bf\u30b9\u30af\u3067\u3059\u3002\u3053\u306e\u3088\u3046\u306a\u30b7\u30ca\u30ea\u30aa\u3067\u306f\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u7684\u306a\u80fd\u529b\u304c\u5fc5\u8981\u3068\u306a\u308a\u3001RL\u3092\u7528\u3044\u3066\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u308b\u7814\u7a76\u304c\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002<\/p>\n<p data-line=\"239\" class=\"code-line\">SWE-RL\u3067\u306f\u3001GitHub\u306e460\u4e07\u4ef6\u306e\u516c\u958b\u30ec\u30dd\u30b8\u30c8\u30ea\u3092\u5bfe\u8c61\u306b\u3001\u30a4\u30b7\u30e5\u30fc\u3001\u30d7\u30eb\u30ea\u30af\u30a8\u30b9\u30c8\u3001\u30ec\u30d3\u30e5\u30fc\u30b3\u30e1\u30f3\u30c8\u3092\u6642\u7cfb\u5217\u3067\u53ce\u96c6\u3057\u3066RL\u7528\u306e\u5b66\u7fd2\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u69cb\u7bc9\u3057\u3066\u3044\u307e\u3059\u3002<br \/>\u672c\u7814\u7a76\u306e\u91cd\u8981\u306a\u70b9\u306f\u3001\u8907\u96d1\u306a\u30b7\u30df\u30e5\u30ec\u30fc\u30bf\u30fc\u3084\u5b9f\u884c\u74b0\u5883\u3092\u5fc5\u8981\u3068\u305b\u305a\u3001\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u304c\u751f\u6210\u3057\u305f\u4fee\u6b63\u30b3\u30fc\u30c9 <embed-katex><eq class=\"zenn-katex\">\\text{patch}_{\\text{pred}}<\/eq><\/embed-katex> \u3068\u4eba\u9593\u304c\u66f8\u3044\u305f\u6b63\u89e3\u306e\u30b3\u30fc\u30c9 <embed-katex><eq class=\"zenn-katex\">\\text{patch}_{\\text{gt}}<\/eq><\/embed-katex> \u3092\u3001Python\u306e<code>difflib.SequenceMatcher<\/code>\u3068\u3044\u3046\u6587\u5b57\u5217\u306e\u5dee\u5206\u304b\u3089\u985e\u4f3c\u5ea6\u3092\u8a08\u7b97\u3059\u308b\u30af\u30e9\u30b9\u3092\u4f7f\u3063\u3066\u5831\u916c\u3092\u8a08\u7b97\u3057\u3066\u3044\u308b\u70b9\u3067\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u3001\u81a8\u5927\u306a\u30c7\u30fc\u30bf\u306b\u5bfe\u3057\u3066\u8efd\u91cf\u304b\u3064\u30b9\u30b1\u30fc\u30e9\u30d6\u30eb\u306aRL\u304c\u53ef\u80fd\u3068\u306a\u3063\u3066\u3044\u307e\u3059\u3002<\/p>\n<section class=\"zenn-katex code-line\" data-line=\"242\"><eqn><embed-katex display-mode=\"1\"><br \/>\nR(\\tau) = \\begin{cases}<br \/>\n-1 &amp; \\text{if format is incorrect} \\\\<br \/>\n\\text{compare}(\\text{patch}_{\\text{pred}}, \\text{patch}_{\\text{gt}}) &amp; \\text{otherwise}<br \/>\n\\end{cases}<br \/>\n<\/embed-katex><\/eqn><\/section>\n<p data-line=\"249\" class=\"code-line\">\u307e\u305fSWE-RL\u306f\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u306e\u30d0\u30b0\u4fee\u6b63\u3068\u3044\u3046\u7279\u5b9a\u306e\u30bf\u30b9\u30af\u306e\u307f\u3067\u8a13\u7df4\u3055\u308c\u305f\u306b\u3082\u304b\u304b\u308f\u3089\u305a\u3001\u305d\u306e\u904e\u7a0b\u3067\u7372\u5f97\u3057\u305f\u63a8\u8ad6\u80fd\u529b\u304c\u3001\u6570\u5b66\u3001\u4e00\u822c\u7684\u306a\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u3001\u8a00\u8a9e\u7406\u89e3\u3068\u3044\u3063\u305f\u5168\u304f\u7570\u306a\u308b\u30c9\u30e1\u30a4\u30f3\u306e\u30bf\u30b9\u30af\u306b\u304a\u3044\u3066\u3082\u6027\u80fd\u5411\u4e0a\u3092\u3082\u305f\u3089\u3059\u3053\u3068\u3092\u793a\u3057\u307e\u3057\u305f\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--hWwiArny--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/1a812c620ae00d73e2489c7d.png%3Fsha%3D50f1950887c0f7e36955b3bee564500af82d03a1\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>SWE-RL: Advancing LLM Reasoning via Reinforcement Learning on Open Software Evolution (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2502.18449\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2502.18449<\/a>)<\/em><\/p>\n<p data-line=\"253\" class=\"code-line\">SWE-RL\u304c\u30b3\u30fc\u30c9\u5b9f\u884c\u74b0\u5883\u3092\u5fc5\u8981\u3068\u3057\u306a\u3044\u306e\u306b\u5bfe\u3057\u3001\u5b9f\u969b\u306b\u30b3\u30fc\u30c9\u3092\u5b9f\u884c\u3059\u308b\u74b0\u5883\u3092\u5229\u7528\u3057\u3066RL\u3092\u884c\u3046\u7814\u7a76\u3082\u3042\u308a\u307e\u3059\u3002<br \/>Qwen3 Coder\u306f\u30b3\u30fc\u30c9\u5b9f\u884c\u74b0\u5883\u3092\u7528\u610f\u3057\u30c6\u30b9\u30c8\u7d50\u679c\u3084\u30a8\u30e9\u30fc\u60c5\u5831\u306a\u3069\u306e\u3088\u3046\u306a\u691c\u8a3c\u53ef\u80fd\u306a\u5831\u916c\u3092\u5229\u7528\u3057\u3066\u30b3\u30fc\u30c7\u30a3\u30f3\u30b0\u80fd\u529b\u5411\u4e0a\u306e\u305f\u3081\u306eRL\u3092\u884c\u306a\u3063\u3066\u3044\u307e\u3059\u3002\u30b3\u30fc\u30c9\u5b9f\u884c\u74b0\u5883\u306b\u3064\u3044\u3066\u3082\u30a2\u30ea\u30d0\u30d0\u30af\u30e9\u30a6\u30c9\u3092\u6d3b\u7528\u30572\u4e07\u500b\u306e\u72ec\u7acb\u3057\u305f\u74b0\u5883\u3092\u4e26\u5217\u5b9f\u884c\u53ef\u80fd\u3068\u306a\u308b\u3088\u3046\u306b\u30b7\u30b9\u30c6\u30e0\u3092\u69cb\u7bc9\u3059\u308b\u3053\u3068\u3067\u5927\u898f\u6a21\u306a\u5f37\u5316\u5b66\u7fd2\u3092\u5b9f\u73fe\u3057\u3066\u3044\u307e\u3059\u3002\u7d50\u679c\u3068\u3057\u3066\u30bd\u30d5\u30c8\u30a6\u30a7\u30a2\u30a8\u30f3\u30b8\u30cb\u30a2\u30ea\u30f3\u30b0\u30bf\u30b9\u30af\u3092\u6271\u3046SWE-Bench Verified\u306b\u304a\u3044\u3066\u30aa\u30fc\u30d7\u30f3\u30bd\u30fc\u30b9\u30e2\u30c7\u30eb\u4e2d\u3067\u6700\u9ad8\u6c34\u6e96\u306e\u6027\u80fd\u3092\u9054\u6210\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--vBS8__uR--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/8b6c68ce62f37b771cd83f82.png%3Fsha%3D8e227ca891df5ef95b3e2186070893c3fc0dc8c9\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Qwen3-Coder: Agentic Coding in the World (<a target=\"_blank\" href=\"https:\/\/qwen.ai\/blog?id=d927d7d2e59d059045ce758ded34f98c0186d2d7&amp;from=research.research-list\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/qwen.ai\/blog?id=d927d7d2e59d059045ce758ded34f98c0186d2d7&amp;from=research.research-list<\/a><\/em><\/p>\n<h3 id=\"%E6%95%B0%E5%AD%A6%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88\" data-line=\"258\" class=\"code-line\">\n \u6570\u5b66\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/h3>\n<p data-line=\"259\" class=\"code-line\">\u6570\u5b66\u7684\u63a8\u8ad6\u306f\u3001\u305d\u306e\u8a18\u53f7\u7684\u306a\u62bd\u8c61\u6027\u3001\u8ad6\u7406\u7684\u306a\u4e00\u8cab\u6027\u3001\u305d\u3057\u3066\u9577\u671f\u306b\u308f\u305f\u308b\u6f14\u7e79\u304c\u6c42\u3081\u3089\u308c\u308b\u3068\u3044\u3046\u6027\u8cea\u304b\u3089\u3001LLM\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u63a8\u8ad6\u80fd\u529b\u3092\u8a55\u4fa1\u3059\u308b\u4e0a\u3067\u6975\u3081\u3066\u91cd\u8981\u306a\u57fa\u6e96\u3068\u8003\u3048\u3089\u308c\u3066\u3044\u307e\u3059\u3002\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u30b3\u30a2\u80fd\u529b\u3067\u7d39\u4ecb\u3057\u305f\u7814\u7a76\u3067\u3082\u6570\u5b66\u7684\u306a\u30bf\u30b9\u30af\u306b\u5bfe\u3059\u308b\u6027\u80fd\u3092\u898b\u3066\u3044\u308b\u3082\u306e\u304c\u6570\u591a\u304f\u3042\u308a\u307e\u3059\u3002<br \/>rStar2-Agent\u306f\u3001\u96e3\u3057\u3044\u6570\u5b66\u7684\u30bf\u30b9\u30af\u306b\u5bfe\u3057\u3066\u3001\u63a8\u8ad6\u30c7\u30fc\u30bf\u306b\u3088\u308bSFT\u306a\u3057\u306e\u7d14\u7c8b\u306aAgentic RL\u30a2\u30d7\u30ed\u30fc\u30c1\u306b\u3088\u3063\u3066\u300114B\u30d1\u30e9\u30e1\u30fc\u30bf\u3067671B\u306eDeepSeek-R1-Zero\u3092\u4e0a\u56de\u308b\u6027\u80fd\u3068\u5b66\u7fd2\u52b9\u7387\u3092\u9054\u6210\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u306e\u7814\u7a76\u3067\u306f\u3001\u30c4\u30fc\u30eb\u5229\u7528\u306e\u7bc0\u3067\u7d39\u4ecb\u3057\u305fReTool\u306e\u3088\u3046\u306bPython\u306e\u5b9f\u884c\u74b0\u5883\u3092\u30c4\u30fc\u30eb\u3068\u3057\u3066\u7528\u3044\u305f\u30c4\u30fc\u30eb\u7d71\u5408\u578b\u63a8\u8ad6\u3092\u884c\u3063\u3066\u3044\u308b\u70b9\u3068\u3001\u8907\u6570\u56de\u30ed\u30fc\u30eb\u30a2\u30a6\u30c8\u3067\u751f\u6210\u3057\u305f\u56de\u7b54\u5019\u88dc\u304b\u3089\u6700\u7d42\u7684\u306b\u6b63\u89e3\u3057\u3066\u304a\u308a\u63a8\u8ad6\u904e\u7a0b\u3067\u5229\u7528\u3057\u305f\u30c4\u30fc\u30eb\u547c\u3073\u51fa\u3057\u306e\u30a8\u30e9\u30fc\u304c\u5c11\u306a\u3044\u8cea\u306e\u9ad8\u3044\u6210\u529f\u4f8b\u3092\u512a\u5148\u7684\u306b\u30b5\u30f3\u30d7\u30ea\u30f3\u30b0\u3057\u3066\u5b66\u7fd2\u3059\u308bResample on Correct\uff08RoC\uff09\u3068\u547c\u3070\u308c\u308b\u30c6\u30af\u30cb\u30c3\u30af\u3092\u5c0e\u5165\u3057\u3066\u3044\u308b\u70b9\u304c\u7279\u5fb4\u3067\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--w7CH1Jxp--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/a29eedb578a641e3893b323a.png%3Fsha%3D5bd89117cdcb87f01123bd162b0d2a03545066a1\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>rStar2-Agent: Agentic Reasoning Technical Report (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2508.20722\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2508.20722<\/a>)<\/em><\/p>\n<p data-line=\"264\" class=\"code-line\">1Shot-RLVR\u3067\u306f\u6570\u5b66\u7684\u63a8\u8ad6\u80fd\u529b\u3092\u5411\u4e0a\u3055\u305b\u308b\u305f\u3081\u306b\u3001\u305f\u3063\u305f1\u3064\u306e\u8a13\u7df4\u4f8b\u3092\u7528\u3044\u305fRL\u304c\u6709\u52b9\u3067\u3042\u3063\u305f\u3053\u3068\u3092\u5b9f\u8a3c\u3057\u3066\u3044\u307e\u3059\u3002\u5177\u4f53\u7684\u306b\u306f\u3001\u30d9\u30fc\u30b9\u30e2\u30c7\u30eb\u306eQwen2.5-Math-1.5B\u306b\u5bfe\u3057\u30661\u3064\u306e\u8a13\u7df4\u4f8b\u3092\u9069\u7528\u3059\u308b\u3060\u3051\u3067\u3001MATH500\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u306736.0%\u304b\u308973.6%\u3078\u3068\u5927\u5e45\u306a\u6027\u80fd\u5411\u4e0a\u3092\u9054\u6210\u3057\u30016\u3064\u306e\u4e3b\u8981\u306a\u6570\u5b66\u7684\u63a8\u8ad6\u30d9\u30f3\u30c1\u30de\u30fc\u30af\u306e\u5e73\u5747\u306717.6%\u304b\u308935.7%\u306b\u6539\u5584\u3057\u305f\u3053\u3068\u3092\u793a\u3057\u3066\u3044\u307e\u3059\u3002\u3053\u308c\u306f\u3001\u6570\u5343\u306e\u4f8b\u3092\u542b\u3080\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u3092\u7528\u3044\u305f\u5834\u5408\u3068\u540c\u7b49\u307e\u305f\u306f\u305d\u308c\u4ee5\u4e0a\u306e\u6027\u80fd\u3067\u3042\u308b\u3053\u3068\u3092\u793a\u5506\u3057\u3066\u304a\u308a\u3001\u5c11\u91cf\u306e\u30c7\u30fc\u30bf\u3067LLM\u306e\u63a8\u8ad6\u80fd\u529b\u3092\u52b9\u679c\u7684\u306b\u6d3b\u6027\u5316\u3067\u304d\u308b\u3053\u3068\u3092\u5f37\u8abf\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--AgHeHplT--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/ab7107285a5cf26f0599232e.png%3Fsha%3Dc17401ca478f1e3717b4f006f9465801729203bc\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>Reinforcement Learning for Reasoning in Large Language Models with One Training Example (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2504.20571\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2504.20571<\/a>)<\/em><\/p>\n<h3 id=\"gui%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88\" data-line=\"268\" class=\"code-line\">\n GUI\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/h3>\n<p data-line=\"269\" class=\"code-line\">GUI\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306fWeb\u30d6\u30e9\u30a6\u30b6\u64cd\u4f5c\u3084\u30a2\u30d7\u30ea\u30b1\u30fc\u30b7\u30e7\u30f3\u64cd\u4f5c\u3068\u3044\u3063\u305f\u30bf\u30b9\u30af\u3092\u81ea\u5f8b\u7684\u306b\u884c\u3046\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3067\u3059\u3002\u7814\u7a76\u521d\u671f\u306fVision-Language Model\uff08VLM\uff09\u3092\u7528\u3044\u3066\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8\u3068\u30d7\u30ed\u30f3\u30d7\u30c8\u3092\u5165\u529b\u3068\u3057\u3066\u5358\u4e00\u30b9\u30c6\u30c3\u30d7\u306eGUI\u64cd\u4f5c\u3092\u884c\u3046\u65b9\u6cd5\u304c\u63d0\u6848\u3055\u308c\u307e\u3057\u305f\u3002\u305d\u306e\u5f8c\u3001\u4eba\u9593\u306eGUI\u64cd\u4f5c\u5b9f\u7e3e\u3092\u3082\u3068\u306b\u753b\u9762\uff08\u72b6\u614b\uff09\u3068GUI\u64cd\u4f5c\uff08\u884c\u52d5\uff09\u3068\u306e\u8ecc\u8de1\u30c7\u30fc\u30bf\u3092\u7528\u3044\u3066GUI\u64cd\u4f5c\u3092\u6a21\u5023\u5b66\u7fd2\u3059\u308b\u65b9\u6cd5\u304c\u8a66\u307f\u3089\u308c\u307e\u3057\u305f\u3002\u3057\u304b\u3057\u3001\u6a21\u5023\u5b66\u7fd2\u3092\u3059\u308b\u306b\u3042\u305f\u308a\u4eba\u9593\u306b\u3088\u308bGUI\u64cd\u4f5c\u8a18\u9332\u306e\u30c7\u30fc\u30bf\u30bb\u30c3\u30c8\u304c\u4e4f\u3057\u3044\u3068\u3044\u3046\u8ab2\u984c\u304c\u3042\u308a\u307e\u3057\u305f\u3002\u3053\u306e\u3088\u3046\u306a\u80cc\u666f\u304b\u3089\u3001\u30a2\u30a6\u30c8\u30ab\u30e0\u30d9\u30fc\u30b9\u3067\u5b66\u7fd2\u53ef\u80fd\u306aRL\u3092\u7528\u3044\u308b\u7814\u7a76\u304c\u9032\u5c55\u3057\u3066\u3044\u307e\u3059\u3002<br \/>UI-TARS\u306f\u3001\u4eba\u9593\u306e\u3088\u3046\u306bGUI\u306e\u30b9\u30af\u30ea\u30fc\u30f3\u30b7\u30e7\u30c3\u30c8\u753b\u50cf\u60c5\u5831\u306e\u307f\u304b\u3089OS\u3001Web\u3001\u30e2\u30d0\u30a4\u30eb\u30a2\u30d7\u30ea\u306a\u3069\u3001\u3042\u3089\u3086\u308bGUI\u74b0\u5883\u3067\u7d71\u4e00\u7684\u306b\u52d5\u4f5c\u3059\u308b\u9ad8\u3044\u6c4e\u7528\u6027\u3092\u5b9f\u73fe\u3057\u3066\u3044\u307e\u3059\u3002\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3092\u591a\u6570\u306e\u4eee\u60f3\u30de\u30b7\u30f3\u4e0a\u3067\u5b9f\u969b\u306b\u52d5\u4f5c\u3055\u305b\u3001\u65b0\u3057\u3044\u64cd\u4f5c\u30c7\u30fc\u30bf\uff08\u8ecc\u8de1\uff09\u3092\u81ea\u52d5\u3067\u53ce\u96c6\u3057\u3001\u53ce\u96c6\u3057\u305f\u30c7\u30fc\u30bf\u306e\u4e2d\u304b\u3089\u5931\u6557\u3057\u305f\u64cd\u4f5c\u3068\u305d\u308c\u3092\u4fee\u6b63\u3057\u305f\u6b63\u3057\u3044\u64cd\u4f5c\u306e\u30da\u30a2\u3092\u7279\u5b9a\u3057\u3001DPO\uff08Direct Preference Optimization\uff09\u3068\u3044\u3046\u624b\u6cd5\u3092\u7528\u3044\u3066\u300c\u5931\u6557\u304b\u3089\u5b66\u3076\u300d\u3088\u3046\u306b\u30e2\u30c7\u30eb\u3092\u30c1\u30e5\u30fc\u30cb\u30f3\u30b0\u3057\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--2uaYELXe--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/221c87e10b6debf828ba372e.png%3Fsha%3Dcad4e804ef344bbdc716640bcf2ce631a9e0d1e8\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>UI-TARS: Pioneering Automated GUI Interaction with Native Agents (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2501.12326\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2501.12326<\/a>)<\/em><\/p>\n<h3 id=\"%E8%BA%AB%E4%BD%93%E6%80%A7%E3%82%92%E6%8C%81%E3%81%A4%E3%82%A8%E3%83%BC%E3%82%B8%E3%82%A7%E3%83%B3%E3%83%88\" data-line=\"274\" class=\"code-line\">\n \u8eab\u4f53\u6027\u3092\u6301\u3064\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<\/h3>\n<p data-line=\"275\" class=\"code-line\">\u8eab\u4f53\u6027\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\uff08Embodied Agents\uff09\u306f\u3001\u30ed\u30dc\u30c3\u30c8\u306e\u3088\u3046\u306b\u7269\u7406\u7684\u306a\u74b0\u5883\u3067\u30de\u30eb\u30c1\u30e2\u30fc\u30c0\u30eb\u306a\u60c5\u5831\u3092\u3082\u3068\u306b\u7269\u7406\u7684\u306a\u884c\u52d5\u3092\u5b9f\u884c\u3059\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3067\u3059\u3002Vision-Language Action\uff08VLA\uff09\u30e2\u30c7\u30eb\u3092\u7528\u3044\u3066\u6a21\u5023\u5b66\u7fd2\u306b\u3088\u308b\u4e8b\u524d\u5b66\u7fd2\u3092\u884c\u3044\u3001\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u30e2\u30c7\u30eb\u3092\u30a4\u30f3\u30bf\u30e9\u30af\u30c6\u30a3\u30d6\u306a\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u7d44\u307f\u8fbc\u307f\u3001\u74b0\u5883\u3068\u76f8\u4e92\u4f5c\u7528\u3055\u305b\u3066RL\u3059\u308b\u3053\u3068\u3067\u591a\u69d8\u306a\u5b9f\u4e16\u754c\u74b0\u5883\u306b\u304a\u3051\u308b\u30e2\u30c7\u30eb\u306e\u6c4e\u5316\u80fd\u529b\u3092\u9ad8\u3081\u308b\u30a2\u30d7\u30ed\u30fc\u30c1\u304c\u4e00\u822c\u7684\u306b\u53d6\u3089\u308c\u3066\u3044\u307e\u3059\u3002VLA\u30d5\u30ec\u30fc\u30e0\u30ef\u30fc\u30af\u306b\u304a\u3051\u308bRL\u306f\u3001\u8907\u96d1\u306a\u74b0\u5883\u3067\u306e\u7a7a\u9593\u7684\u63a8\u8ad6\u3068\u79fb\u52d5\u3092\u91cd\u8996\u3059\u308b\u30ca\u30d3\u30b2\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3068\u3001\u591a\u69d8\u3067\u52d5\u7684\u306a\u5236\u7d04\u4e0b\u3067\u7269\u7406\u30aa\u30d6\u30b8\u30a7\u30af\u30c8\u306e\u7cbe\u5bc6\u306a\u5236\u5fa1\u306b\u7126\u70b9\u3092\u5f53\u3066\u308b\u30de\u30cb\u30d4\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e2\u3064\u306b\u5927\u5225\u3055\u308c\u307e\u3059\u3002<\/p>\n<ul data-line=\"277\" class=\"code-line\">\n<li data-line=\"277\" class=\"code-line\">\n<p data-line=\"277\" class=\"code-line\">\u30ca\u30d3\u30b2\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<br \/>\u30ca\u30d3\u30b2\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306b\u3068\u3063\u3066\u3001\u8a08\u753b\uff08\u30d7\u30e9\u30f3\u30cb\u30f3\u30b0\uff09\u304c\u4e2d\u5fc3\u7684\u306a\u80fd\u529b\u3068\u306a\u308a\u307e\u3059\u3002VLA\u30e2\u30c7\u30eb\u306e\u5c06\u6765\u306e\u884c\u52d5\u7cfb\u5217\u3092\u4e88\u6e2c\u3057\u6700\u9069\u5316\u3059\u308b\u80fd\u529b\u3092\u5f37\u5316\u3059\u308b\u305f\u3081\u306bRL\u304c\u7528\u3044\u3089\u308c\u307e\u3059\u3002\u4e00\u822c\u7684\u306a\u6226\u7565\u306f\u3001\u4e8b\u524d\u5b66\u7fd2\u6e08\u307f\u306eVLA\u30e2\u30c7\u30eb\u3068\u540c\u3058\u3088\u3046\u306b1\u30b9\u30c6\u30c3\u30d7\u3054\u3068\u306e\u79fb\u52d5\u884c\u52d5\u306b\u5bfe\u3057\u3066\u5831\u916c\u3092\u4e0e\u3048\u3066\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u3092\u8a13\u7df4\u3059\u308b\u3053\u3068\u3067\u3059\u3002<br \/>VLN-R1\u306f\u3001RGB\u306e\u30d3\u30c7\u30aa\u6620\u50cf\u3092\u5165\u529b\u3068\u3057\u3066\u3001\u524d\u9032\u3059\u308b\u3001\u56de\u8ee2\u3059\u308b\u3068\u3044\u3063\u305f\u96e2\u6563\u884c\u52d5\u3092\u51fa\u529b\u3059\u308b\u30e2\u30c7\u30eb\u3092SFT\u3068RL\u3067\u5b66\u7fd2\u3057\u3066\u3044\u307e\u3059\u3002\u30e2\u30c7\u30eb\u306f\u4e00\u5ea6\u306b6\u30b9\u30c6\u30c3\u30d7\u5206\u306e\u884c\u52d5\u8ecc\u8de1\u3092\u51fa\u529b\u3057\u3001\u6642\u9593\u6e1b\u8870\u5831\u916c\u3068\u547c\u3070\u308c\u308b\u3001\u3088\u308a\u76f4\u524d\u306e\u884c\u52d5\u306b\u9ad8\u3044\u5831\u916c\u3092\u4e0e\u3048\u308b\u3068\u3044\u3046\u72ec\u81ea\u306e\u5831\u916c\u8a2d\u8a08\u3092\u5229\u7528\u3057\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--ktg80dAm--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/331f475d8124cc1bd239b7f8.png%3Fsha%3D83b138a4fb7f61a31b91edd85c329a57b77b7fcc\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>VLN-R1: Vision-Language Navigation via Reinforcement Fine-Tuning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2506.17221\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2506.17221<\/a>)<\/em><\/p>\n<\/li>\n<li data-line=\"283\" class=\"code-line\">\n<p data-line=\"283\" class=\"code-line\">\u30de\u30cb\u30d4\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8<br \/>\u30de\u30cb\u30d4\u30e5\u30ec\u30fc\u30b7\u30e7\u30f3\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306f\u4e3b\u306b\u30ed\u30dc\u30c3\u30c8\u30a2\u30fc\u30e0\u304c\u95a2\u308f\u308b\u30bf\u30b9\u30af\u3067\u5229\u7528\u3055\u308c\u307e\u3059\u3002RL\u306fVLA\u30e2\u30c7\u30eb\u306e\u6307\u793a\u8ffd\u5f93\u80fd\u529b\u3068\u8ecc\u8de1\u4e88\u6e2c\u80fd\u529b\u3092\u5f37\u5316\u3059\u308b\u305f\u3081\u306b\u3001\u7279\u306b\u30bf\u30b9\u30af\u3084\u74b0\u5883\u3092\u8d8a\u3048\u305f\u6c4e\u5316\u6027\u80fd\u3092\u5411\u4e0a\u3055\u305b\u308b\u76ee\u7684\u3067\u7528\u3044\u3089\u308c\u307e\u3059\u3002<br \/>VLA-RL\u306f\u30ed\u30dc\u30c3\u30c8\u306e\u4e00\u9023\u306e\u52d5\u4f5c\u751f\u6210\u3092\u4eba\u9593\u3068AI\u306e\u5bfe\u8a71\u306e\u3088\u3046\u306b\u6349\u3048\u76f4\u3057\u3066\u3044\u307e\u3059\u3002\u5404\u30bf\u30a4\u30e0\u30b9\u30c6\u30c3\u30d7\u3067\u3001\u30ed\u30dc\u30c3\u30c8\u306f\u300c\u73fe\u5728\u306e\u8996\u899a\u60c5\u5831\uff08\u753b\u50cf\uff09\u300d\u3068\u300c\u4eba\u9593\u304b\u3089\u306e\u6307\u793a\uff08\u30c6\u30ad\u30b9\u30c8\uff09\u300d\u3092\u5165\u529b\u3068\u3057\u3066\u53d7\u3051\u53d6\u308a\u3001\u6b21\u306b\u884c\u3046\u3079\u304d\u884c\u52d5\u3092\u8a00\u8a9e\u30c8\u30fc\u30af\u30f3\u3068\u3057\u3066\u51fa\u529b\u3057\u307e\u3059\u3002\u3053\u308c\u306b\u3088\u308a\u5f37\u529b\u306a\u8a00\u8a9e\u30e2\u30c7\u30eb\u306e\u69cb\u9020\u3092RL\u306b\u76f4\u63a5\u5fdc\u7528\u3059\u308b\u3053\u3068\u304c\u53ef\u80fd\u306b\u306a\u3063\u3066\u3044\u307e\u3059\u3002<br \/><img decoding=\"async\" src=\"https:\/\/res.cloudinary.com\/zenn\/image\/fetch\/s--gHOi29fL--\/c_limit%2Cf_auto%2Cfl_progressive%2Cq_auto%2Cw_1200\/https:\/\/storage.googleapis.com\/zenn-user-upload\/deployed-images\/ba78a8c8908e381602027f7a.png%3Fsha%3D378a701ce7c4a4c8a9d6e40da17c405373ad2831\" class=\"md-img\" loading=\"lazy\"\/><br \/><em>VLA-RL: Towards Masterful and General Robotic Manipulation with Scalable Reinforcement Learning (<a target=\"_blank\" href=\"https:\/\/arxiv.org\/abs\/2505.18719\" target=\"_blank\" rel=\"nofollow noopener noreferrer\">https:\/\/arxiv.org\/abs\/2505.18719<\/a>)<\/em><\/p>\n<\/li>\n<\/ul>\n<h2 id=\"%E3%81%8A%E3%82%8F%E3%82%8A%E3%81%AB\" data-line=\"289\" class=\"code-line\">\n \u304a\u308f\u308a\u306b<\/h2>\n<p data-line=\"290\" class=\"code-line\">Agentic RL\u306f2025\u5e74\u306b\u5165\u3063\u3066\u304b\u3089\u6025\u901f\u306b\u7814\u7a76\u304c\u9032\u5c55\u3057\u3066\u304a\u308a\u3053\u306e\u8a18\u4e8b\u3067\u7d39\u4ecb\u3057\u305f\u7814\u7a76\u3082\u591a\u304f\u306f2025\u5e74\u306b\u767a\u8868\u3055\u308c\u305f\u3082\u306e\u3067\u3059\u3002AI\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u306e\u66f4\u306a\u308b\u6027\u80fd\u5411\u4e0a\u306b\u4eca\u56de\u7d39\u4ecb\u3057\u305fAgentic RL\u304c\u4eca\u5f8c\u3069\u306e\u3088\u3046\u306b\u95a2\u308f\u3063\u3066\u3044\u304f\u304b\u3092\u975e\u5e38\u306b\u697d\u3057\u307f\u306b\u3057\u3066\u3044\u307e\u3059\u3002\u306a\u304b\u306a\u304b\u306e\u30dc\u30ea\u30e5\u30fc\u30e0\u3068\u306a\u3063\u3066\u3057\u307e\u3044\u307e\u3057\u305f\u304c\u3001\u6700\u5f8c\u307e\u3067\u304a\u8aad\u307f\u3044\u305f\u3060\u304d\u3042\u308a\u304c\u3068\u3046\u3054\u3056\u3044\u307e\u3057\u305f\u3002<\/p>\n<\/div>\n\n<br \/><a href=\"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025\">\u5143\u306e\u8a18\u4e8b\u3092\u78ba\u8a8d\u3059\u308b <\/a><\/p>\n","protected":false},"excerpt":{"rendered":"\u306f\u3058\u3081\u306b \u672c\u8a18\u4e8b\u3067\u306f\u3001LLM\u7814\u7a76\u3067\u6ce8\u76ee\u3092\u96c6\u3081\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u578b\u5f37\u5316\u5b66\u7fd2\uff08Agentic Reinforcement Learning\u3001Agentic RL\uff09\u306e\u30b5\u30fc\u30d9\u30a4\u8ad6\u6587\u300cThe Landscape of Agentic  [&hellip;]","protected":false},"author":1,"featured_media":8794,"comment_status":"closed","ping_status":"closed","sticky":false,"template":"","format":"standard","meta":{"_acf_changed":false,"footnotes":""},"categories":[2],"tags":[],"class_list":["post-8793","post","type-post","status-publish","format-standard","has-post-thumbnail","hentry","category-hatena-blog"],"acf":[],"yoast_head":"<!-- This site is optimized with the Yoast SEO plugin v27.6 - https:\/\/yoast.com\/product\/yoast-seo-wordpress\/ -->\n<title>LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3<\/title>\n<meta name=\"robots\" content=\"index, follow, max-snippet:-1, max-image-preview:large, max-video-preview:-1\" \/>\n<link rel=\"canonical\" href=\"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025\" \/>\n<meta property=\"og:locale\" content=\"ja_JP\" \/>\n<meta property=\"og:type\" content=\"article\" \/>\n<meta property=\"og:title\" content=\"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3\" \/>\n<meta property=\"og:description\" content=\"\u306f\u3058\u3081\u306b \u672c\u8a18\u4e8b\u3067\u306f\u3001LLM\u7814\u7a76\u3067\u6ce8\u76ee\u3092\u96c6\u3081\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u578b\u5f37\u5316\u5b66\u7fd2\uff08Agentic Reinforcement Learning\u3001Agentic RL\uff09\u306e\u30b5\u30fc\u30d9\u30a4\u8ad6\u6587\u300cThe Landscape of Agentic [&hellip;]\" \/>\n<meta property=\"og:url\" content=\"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025\" \/>\n<meta property=\"og:site_name\" content=\"\u30dd\u30b1\u30b3\u30f3\" \/>\n<meta property=\"article:published_time\" content=\"2025-10-09T16:19:33+00:00\" \/>\n<meta property=\"og:image\" content=\"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png\" \/>\n\t<meta property=\"og:image:width\" content=\"1200\" \/>\n\t<meta property=\"og:image:height\" content=\"630\" \/>\n\t<meta property=\"og:image:type\" content=\"image\/png\" \/>\n<meta name=\"author\" content=\"info@pokecon.jp\" \/>\n<meta name=\"twitter:card\" content=\"summary_large_image\" \/>\n<meta name=\"twitter:label1\" content=\"\u57f7\u7b46\u8005\" \/>\n\t<meta name=\"twitter:data1\" content=\"info@pokecon.jp\" \/>\n\t<meta name=\"twitter:label2\" content=\"\u63a8\u5b9a\u8aad\u307f\u53d6\u308a\u6642\u9593\" \/>\n\t<meta name=\"twitter:data2\" content=\"5\u5206\" \/>\n<script type=\"application\/ld+json\" class=\"yoast-schema-graph\">{\"@context\":\"https:\\\/\\\/schema.org\",\"@graph\":[{\"@type\":\"Article\",\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#article\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/8793\\\/\"},\"author\":{\"name\":\"info@pokecon.jp\",\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/#\\\/schema\\\/person\\\/16c9f07b1ba984d165d9aee259bda997\"},\"headline\":\"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb\",\"datePublished\":\"2025-10-09T16:19:33+00:00\",\"mainEntityOfPage\":{\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/8793\\\/\"},\"wordCount\":950,\"image\":{\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/wp-content\\\/uploads\\\/2025\\\/10\\\/1760026773_og-base-w1200-v2.png\",\"articleSection\":[\"\u306f\u3066\u306a\u30d6\u30ed\u30b0\"],\"inLanguage\":\"ja\"},{\"@type\":\"WebPage\",\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/8793\\\/\",\"url\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025\",\"name\":\"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3\",\"isPartOf\":{\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/#website\"},\"primaryImageOfPage\":{\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#primaryimage\"},\"image\":{\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#primaryimage\"},\"thumbnailUrl\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/wp-content\\\/uploads\\\/2025\\\/10\\\/1760026773_og-base-w1200-v2.png\",\"datePublished\":\"2025-10-09T16:19:33+00:00\",\"author\":{\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/#\\\/schema\\\/person\\\/16c9f07b1ba984d165d9aee259bda997\"},\"breadcrumb\":{\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#breadcrumb\"},\"inLanguage\":\"ja\",\"potentialAction\":[{\"@type\":\"ReadAction\",\"target\":[\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025\"]}]},{\"@type\":\"ImageObject\",\"inLanguage\":\"ja\",\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#primaryimage\",\"url\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/wp-content\\\/uploads\\\/2025\\\/10\\\/1760026773_og-base-w1200-v2.png\",\"contentUrl\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/wp-content\\\/uploads\\\/2025\\\/10\\\/1760026773_og-base-w1200-v2.png\",\"width\":1200,\"height\":630},{\"@type\":\"BreadcrumbList\",\"@id\":\"https:\\\/\\\/zenn.dev\\\/kuto5046\\\/articles\\\/agentic_rl_2025#breadcrumb\",\"itemListElement\":[{\"@type\":\"ListItem\",\"position\":1,\"name\":\"\u30db\u30fc\u30e0\",\"item\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/\"},{\"@type\":\"ListItem\",\"position\":2,\"name\":\"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb\"}]},{\"@type\":\"WebSite\",\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/#website\",\"url\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/\",\"name\":\"\u30dd\u30b1\u30b3\u30f3\",\"description\":\"\",\"potentialAction\":[{\"@type\":\"SearchAction\",\"target\":{\"@type\":\"EntryPoint\",\"urlTemplate\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/?s={search_term_string}\"},\"query-input\":{\"@type\":\"PropertyValueSpecification\",\"valueRequired\":true,\"valueName\":\"search_term_string\"}}],\"inLanguage\":\"ja\"},{\"@type\":\"Person\",\"@id\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/#\\\/schema\\\/person\\\/16c9f07b1ba984d165d9aee259bda997\",\"name\":\"info@pokecon.jp\",\"image\":{\"@type\":\"ImageObject\",\"inLanguage\":\"ja\",\"@id\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g\",\"url\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g\",\"contentUrl\":\"https:\\\/\\\/secure.gravatar.com\\\/avatar\\\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g\",\"caption\":\"info@pokecon.jp\"},\"url\":\"https:\\\/\\\/pokecon.jp\\\/job\\\/author\\\/infopokecon-jp\\\/\"}]}<\/script>\n<!-- \/ Yoast SEO plugin. -->","yoast_head_json":{"title":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3","robots":{"index":"index","follow":"follow","max-snippet":"max-snippet:-1","max-image-preview":"max-image-preview:large","max-video-preview":"max-video-preview:-1"},"canonical":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025","og_locale":"ja_JP","og_type":"article","og_title":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3","og_description":"\u306f\u3058\u3081\u306b \u672c\u8a18\u4e8b\u3067\u306f\u3001LLM\u7814\u7a76\u3067\u6ce8\u76ee\u3092\u96c6\u3081\u308b\u30a8\u30fc\u30b8\u30a7\u30f3\u30c8\u578b\u5f37\u5316\u5b66\u7fd2\uff08Agentic Reinforcement Learning\u3001Agentic RL\uff09\u306e\u30b5\u30fc\u30d9\u30a4\u8ad6\u6587\u300cThe Landscape of Agentic [&hellip;]","og_url":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025","og_site_name":"\u30dd\u30b1\u30b3\u30f3","article_published_time":"2025-10-09T16:19:33+00:00","og_image":[{"width":1200,"height":630,"url":"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png","type":"image\/png"}],"author":"info@pokecon.jp","twitter_card":"summary_large_image","twitter_misc":{"\u57f7\u7b46\u8005":"info@pokecon.jp","\u63a8\u5b9a\u8aad\u307f\u53d6\u308a\u6642\u9593":"5\u5206"},"schema":{"@context":"https:\/\/schema.org","@graph":[{"@type":"Article","@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#article","isPartOf":{"@id":"https:\/\/pokecon.jp\/job\/8793\/"},"author":{"name":"info@pokecon.jp","@id":"https:\/\/pokecon.jp\/job\/#\/schema\/person\/16c9f07b1ba984d165d9aee259bda997"},"headline":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb","datePublished":"2025-10-09T16:19:33+00:00","mainEntityOfPage":{"@id":"https:\/\/pokecon.jp\/job\/8793\/"},"wordCount":950,"image":{"@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#primaryimage"},"thumbnailUrl":"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png","articleSection":["\u306f\u3066\u306a\u30d6\u30ed\u30b0"],"inLanguage":"ja"},{"@type":"WebPage","@id":"https:\/\/pokecon.jp\/job\/8793\/","url":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025","name":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb - \u30dd\u30b1\u30b3\u30f3","isPartOf":{"@id":"https:\/\/pokecon.jp\/job\/#website"},"primaryImageOfPage":{"@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#primaryimage"},"image":{"@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#primaryimage"},"thumbnailUrl":"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png","datePublished":"2025-10-09T16:19:33+00:00","author":{"@id":"https:\/\/pokecon.jp\/job\/#\/schema\/person\/16c9f07b1ba984d165d9aee259bda997"},"breadcrumb":{"@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#breadcrumb"},"inLanguage":"ja","potentialAction":[{"@type":"ReadAction","target":["https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025"]}]},{"@type":"ImageObject","inLanguage":"ja","@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#primaryimage","url":"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png","contentUrl":"https:\/\/pokecon.jp\/job\/wp-content\/uploads\/2025\/10\/1760026773_og-base-w1200-v2.png","width":1200,"height":630},{"@type":"BreadcrumbList","@id":"https:\/\/zenn.dev\/kuto5046\/articles\/agentic_rl_2025#breadcrumb","itemListElement":[{"@type":"ListItem","position":1,"name":"\u30db\u30fc\u30e0","item":"https:\/\/pokecon.jp\/job\/"},{"@type":"ListItem","position":2,"name":"LLM\u00d7\u5f37\u5316\u5b66\u7fd2\u306e\u65b0\u3057\u3044\u30d1\u30e9\u30c0\u30a4\u30e0: Agentic RL\u306e\u7814\u7a76\u7d39\u4ecb"}]},{"@type":"WebSite","@id":"https:\/\/pokecon.jp\/job\/#website","url":"https:\/\/pokecon.jp\/job\/","name":"\u30dd\u30b1\u30b3\u30f3","description":"","potentialAction":[{"@type":"SearchAction","target":{"@type":"EntryPoint","urlTemplate":"https:\/\/pokecon.jp\/job\/?s={search_term_string}"},"query-input":{"@type":"PropertyValueSpecification","valueRequired":true,"valueName":"search_term_string"}}],"inLanguage":"ja"},{"@type":"Person","@id":"https:\/\/pokecon.jp\/job\/#\/schema\/person\/16c9f07b1ba984d165d9aee259bda997","name":"info@pokecon.jp","image":{"@type":"ImageObject","inLanguage":"ja","@id":"https:\/\/secure.gravatar.com\/avatar\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g","url":"https:\/\/secure.gravatar.com\/avatar\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g","contentUrl":"https:\/\/secure.gravatar.com\/avatar\/2b0549cd9f7907c092ca5fbb283baf72337f235726e4b46fa39ec0b701ac2fe2?s=96&d=wavatar&r=g","caption":"info@pokecon.jp"},"url":"https:\/\/pokecon.jp\/job\/author\/infopokecon-jp\/"}]}},"_links":{"self":[{"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/posts\/8793","targetHints":{"allow":["GET"]}}],"collection":[{"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/posts"}],"about":[{"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/types\/post"}],"author":[{"embeddable":true,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/users\/1"}],"replies":[{"embeddable":true,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/comments?post=8793"}],"version-history":[{"count":1,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/posts\/8793\/revisions"}],"predecessor-version":[{"id":8795,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/posts\/8793\/revisions\/8795"}],"wp:featuredmedia":[{"embeddable":true,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/media\/8794"}],"wp:attachment":[{"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/media?parent=8793"}],"wp:term":[{"taxonomy":"category","embeddable":true,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/categories?post=8793"},{"taxonomy":"post_tag","embeddable":true,"href":"https:\/\/pokecon.jp\/job\/wp-json\/wp\/v2\/tags?post=8793"}],"curies":[{"name":"wp","href":"https:\/\/api.w.org\/{rel}","templated":true}]}}