Skip to content

Commit acc709f

Browse files
dbym4820claude
andcommitted
Add AI-powered RSS structure analysis for journal registration
- Add rss_extraction_config column to journals table for storing AI-generated extraction rules - Create RssStructureAnalyzer service for analyzing RSS feed structure with AI - Modify AdminController to call AI analysis when registering new RSS journals - Update RssFetcherService to apply extraction rules during feed fetching - Support summary tag parsing with regex patterns for extracting embedded information - Fallback to SimplePie heuristics if AI extraction rules fail 🤖 Generated with [Claude Code](https://claude.com/claude-code) Co-Authored-By: Claude Opus 4.5 <[email protected]>
1 parent 77db92e commit acc709f

File tree

6 files changed

+1023
-25
lines changed

6 files changed

+1023
-25
lines changed

app/Http/Controllers/AdminController.php

Lines changed: 40 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@
1010
use App\Models\User;
1111
use App\Services\RssFetcherService;
1212
use App\Services\AiRssGeneratorService;
13+
use App\Services\RssStructureAnalyzer;
1314

1415
class AdminController extends Controller
1516
{
@@ -19,10 +20,17 @@ class AdminController extends Controller
1920
/** @var AiRssGeneratorService */
2021
private $aiRssGenerator;
2122

22-
public function __construct(RssFetcherService $rssFetcher, AiRssGeneratorService $aiRssGenerator)
23-
{
23+
/** @var RssStructureAnalyzer */
24+
private $rssAnalyzer;
25+
26+
public function __construct(
27+
RssFetcherService $rssFetcher,
28+
AiRssGeneratorService $aiRssGenerator,
29+
RssStructureAnalyzer $rssAnalyzer
30+
) {
2431
$this->rssFetcher = $rssFetcher;
2532
$this->aiRssGenerator = $aiRssGenerator;
33+
$this->rssAnalyzer = $rssAnalyzer;
2634
}
2735

2836
// ======================================
@@ -148,6 +156,7 @@ public function createJournal(Request $request): JsonResponse
148156
$journal = Journal::create($journalData);
149157

150158
// 初回フェッチを実行
159+
$analysisResult = null;
151160
if ($journal->isAiGenerated()) {
152161
// AI生成の場合:ページ構造を解析してセレクタを保存
153162
$setupResult = $this->aiRssGenerator->setupFeed($journal, $user);
@@ -161,9 +170,37 @@ public function createJournal(Request $request): JsonResponse
161170
$fetchResult = $setupResult;
162171
}
163172
} else {
164-
// 通常RSSの場合
173+
// 通常RSSの場合:AIでRSS構造を解析(APIキーがある場合のみ)
174+
if ($user->hasEffectiveClaudeApiKey() || $user->hasEffectiveOpenaiApiKey()) {
175+
$journal->markAnalysisPending();
176+
$this->rssAnalyzer->setUser($user);
177+
178+
$analysisResult = $this->rssAnalyzer->analyzeRssFeed($journal);
179+
180+
if ($analysisResult['success']) {
181+
$journal->markAnalysisSuccess($analysisResult['config']);
182+
} else {
183+
// 解析失敗でもジャーナル登録は継続(従来のSimplePieでフェッチ)
184+
$journal->markAnalysisFailed($analysisResult['error'] ?? 'Unknown error');
185+
}
186+
}
187+
188+
// フェッチ実行
165189
$fetchResult = $this->rssFetcher->fetchJournal($journal);
166190
$message = '論文誌を追加しました';
191+
$newPapers = $fetchResult['new_papers'] ?? 0;
192+
if ($newPapers > 0) {
193+
$message .= '' . $newPapers . '件の論文を登録)';
194+
}
195+
196+
// 解析結果の情報を追加
197+
if ($analysisResult) {
198+
if ($analysisResult['success']) {
199+
$message .= '.RSS構造をAIで解析しました';
200+
} else {
201+
$message .= '.RSS構造の解析に失敗しました(従来方式でフェッチ)';
202+
}
203+
}
167204
}
168205

169206
// generatedFeedの情報も返す

app/Models/Journal.php

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,10 @@ class Journal extends Model
1818
'name',
1919
'rss_url',
2020
'source_type',
21+
'rss_extraction_config',
22+
'rss_analysis_status',
23+
'rss_analysis_error',
24+
'rss_analyzed_at',
2125
'color',
2226
'is_active',
2327
'last_fetched_at',
@@ -37,6 +41,8 @@ protected static function boot()
3741
protected $casts = [
3842
'is_active' => 'boolean',
3943
'last_fetched_at' => 'datetime',
44+
'rss_extraction_config' => 'array',
45+
'rss_analyzed_at' => 'datetime',
4046
];
4147

4248
public function user(): BelongsTo
@@ -67,6 +73,51 @@ public function isAiGenerated(): bool
6773
return $this->source_type === 'ai_generated';
6874
}
6975

76+
/**
77+
* Check if this journal has RSS extraction rules
78+
*/
79+
public function hasExtractionRules(): bool
80+
{
81+
return !empty($this->rss_extraction_config)
82+
&& $this->rss_analysis_status === 'success';
83+
}
84+
85+
/**
86+
* Mark RSS analysis as pending
87+
*/
88+
public function markAnalysisPending(): void
89+
{
90+
$this->update([
91+
'rss_analysis_status' => 'pending',
92+
'rss_analysis_error' => null,
93+
]);
94+
}
95+
96+
/**
97+
* Mark RSS analysis as success
98+
*/
99+
public function markAnalysisSuccess(array $config): void
100+
{
101+
$this->update([
102+
'rss_extraction_config' => $config,
103+
'rss_analysis_status' => 'success',
104+
'rss_analysis_error' => null,
105+
'rss_analyzed_at' => now(),
106+
]);
107+
}
108+
109+
/**
110+
* Mark RSS analysis as failed
111+
*/
112+
public function markAnalysisFailed(string $error): void
113+
{
114+
$this->update([
115+
'rss_analysis_status' => 'error',
116+
'rss_analysis_error' => $error,
117+
'rss_analyzed_at' => now(),
118+
]);
119+
}
120+
70121
public function scopeActive($query)
71122
{
72123
return $query->where('is_active', true);

app/Services/AiSummaryService.php

Lines changed: 31 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -95,19 +95,20 @@ private function getFullTextFetcher(): FullTextFetcherService
9595
* データソースの優先順位に従って要約用コンテンツを準備
9696
*
9797
* 優先順位:
98-
* 1. PDF(ローカルまたはリモート)
99-
* 2. full_text(既に取得済みの本文)
100-
* 3. DOIページから本文を取得
101-
* 4. アブストラクト
102-
* 5. タイトル+利用可能なメタデータ
98+
* 1. ローカルPDF(実際にダウンロード済みのPDF)
99+
* 2. DOIページから本文を取得
100+
* 3. full_text(既に取得済みの本文)
101+
* 4. リモートPDF URL(PDFがリモートにのみある場合)
102+
* 5. アブストラクト
103+
* 6. タイトル+利用可能なメタデータ
103104
*
104105
* @param Paper $paper
105106
* @return array ['source' => string, 'content' => string|null, 'pdf_url' => string|null, 'pdf_available' => bool]
106107
*/
107108
private function prepareContentForSummary(Paper $paper): array
108109
{
109-
// 1. PDF(ローカルまたはリモート)があるかチェック
110-
if ($paper->hasLocalPdf() || $paper->pdf_url) {
110+
// 1. ローカルPDF(実際にダウンロード済み)があるかチェック
111+
if ($paper->hasLocalPdf()) {
111112
return [
112113
'source' => 'pdf',
113114
'content' => null,
@@ -116,17 +117,7 @@ private function prepareContentForSummary(Paper $paper): array
116117
];
117118
}
118119

119-
// 2. 既に取得済みの本文があるかチェック
120-
if ($paper->hasFullText()) {
121-
return [
122-
'source' => 'full_text',
123-
'content' => $paper->full_text,
124-
'pdf_url' => null,
125-
'pdf_available' => false,
126-
];
127-
}
128-
129-
// 3. DOIがあればDOIページから本文取得を試みる
120+
// 2. DOIがあればDOIページから本文取得を試みる(原則DOI先を参照)
130121
if (!empty($paper->doi)) {
131122
Log::info("Attempting to fetch full text from DOI for paper {$paper->id}");
132123

@@ -178,7 +169,27 @@ private function prepareContentForSummary(Paper $paper): array
178169
}
179170
}
180171

181-
// 4. アブストラクトがあればそれを使用
172+
// 3. 既に取得済みの本文があるかチェック
173+
if ($paper->hasFullText()) {
174+
return [
175+
'source' => 'full_text',
176+
'content' => $paper->full_text,
177+
'pdf_url' => null,
178+
'pdf_available' => false,
179+
];
180+
}
181+
182+
// 4. リモートPDF URL(ダウンロード済みでないPDF)がある場合
183+
if ($paper->pdf_url) {
184+
return [
185+
'source' => 'pdf',
186+
'content' => null,
187+
'pdf_url' => $paper->pdf_url,
188+
'pdf_available' => true,
189+
];
190+
}
191+
192+
// 5. アブストラクトがあればそれを使用
182193
if (!empty($paper->abstract)) {
183194
return [
184195
'source' => 'abstract',
@@ -188,7 +199,7 @@ private function prepareContentForSummary(Paper $paper): array
188199
];
189200
}
190201

191-
// 5. 最終手段:タイトルと利用可能なメタデータのみ
202+
// 6. 最終手段:タイトルと利用可能なメタデータのみ
192203
return [
193204
'source' => 'minimal',
194205
'content' => null,

0 commit comments

Comments
 (0)