Skip to content

Commit 19e9787

Browse files
feat(workspace): auto-discover and monitor GitLab repositories
When creating a GitLab PAT workspace, all repositories in the selected group are now automatically discovered and added to monitoring — matching the existing GitHub App installation behavior. Key changes: - Extract GitLabWorkspaceInitializationService from WorkspaceActivationService to encapsulate the full GitLab initialization lifecycle (webhook setup, project discovery, org linking, monitor creation, full data sync) - Add GitLabCommitBackfillService for JGit-based commit history with full diff stats (additions, deletions, changedFiles) and file change tracking - Fix subgroup repo inclusion: replace org-login-based query with findAllByWorkspaceMonitors (joins through RepositoryToMonitor) - Fix LazyInitializationException: eagerly fetch provider in repo query - Add rate limit awareness and cooldown for slowly-changing entities - Wire initialization into workspace creation flow (async, fire-and-forget) Fixes #961 Co-Authored-By: Claude Opus 4.6 <noreply@anthropic.com>
1 parent 9803d52 commit 19e9787

File tree

12 files changed

+1675
-472
lines changed

12 files changed

+1675
-472
lines changed
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,286 @@
1+
package de.tum.in.www1.hephaestus.gitprovider.commit.gitlab;
2+
3+
import static de.tum.in.www1.hephaestus.core.LoggingUtils.sanitizeForLog;
4+
5+
import de.tum.in.www1.hephaestus.gitprovider.commit.Commit;
6+
import de.tum.in.www1.hephaestus.gitprovider.commit.CommitAuthorResolver;
7+
import de.tum.in.www1.hephaestus.gitprovider.commit.CommitFileChange;
8+
import de.tum.in.www1.hephaestus.gitprovider.commit.CommitRepository;
9+
import de.tum.in.www1.hephaestus.gitprovider.commit.util.CommitUtils;
10+
import de.tum.in.www1.hephaestus.gitprovider.common.DataSource;
11+
import de.tum.in.www1.hephaestus.gitprovider.common.GitProviderType;
12+
import de.tum.in.www1.hephaestus.gitprovider.common.events.DomainEvent;
13+
import de.tum.in.www1.hephaestus.gitprovider.common.events.EventContext;
14+
import de.tum.in.www1.hephaestus.gitprovider.common.events.EventPayload;
15+
import de.tum.in.www1.hephaestus.gitprovider.common.events.RepositoryRef;
16+
import de.tum.in.www1.hephaestus.gitprovider.common.gitlab.GitLabTokenService;
17+
import de.tum.in.www1.hephaestus.gitprovider.git.GitRepositoryManager;
18+
import de.tum.in.www1.hephaestus.gitprovider.repository.Repository;
19+
import de.tum.in.www1.hephaestus.gitprovider.sync.SyncResult;
20+
import java.time.Instant;
21+
import java.util.List;
22+
import java.util.UUID;
23+
import org.slf4j.Logger;
24+
import org.slf4j.LoggerFactory;
25+
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
26+
import org.springframework.context.ApplicationEventPublisher;
27+
import org.springframework.lang.Nullable;
28+
import org.springframework.stereotype.Service;
29+
import org.springframework.transaction.support.TransactionTemplate;
30+
31+
/**
32+
* Backfills commit history for GitLab repositories via local JGit clones.
33+
*
34+
* <p>Mirrors {@link de.tum.in.www1.hephaestus.gitprovider.commit.github.GitHubCommitBackfillService}
35+
* to provide full diff statistics (additions, deletions, changedFiles) and file change
36+
* tracking — data that the GitLab REST API commit list endpoint does not provide.
37+
*
38+
* <p><b>How it works:</b>
39+
* <ol>
40+
* <li>Clone or fetch the repository via {@link GitRepositoryManager}</li>
41+
* <li>Resolve the HEAD SHA of the default branch</li>
42+
* <li>Walk new commits since the last known SHA (or full history on first backfill)</li>
43+
* <li>For each commit: upsert via native SQL, attach file changes, publish events</li>
44+
* </ol>
45+
*
46+
* <p>When {@code hephaestus.git.enabled=false}, {@link #backfillCommits} returns
47+
* {@link SyncResult#completed(int) SyncResult.completed(0)} so callers can fall
48+
* back to the REST-based {@link GitLabCommitSyncService}.
49+
*
50+
* @see GitLabCommitSyncService
51+
*/
52+
@Service
53+
@ConditionalOnProperty(prefix = "hephaestus.gitlab", name = "enabled", havingValue = "true")
54+
public class GitLabCommitBackfillService {
55+
56+
private static final Logger log = LoggerFactory.getLogger(GitLabCommitBackfillService.class);
57+
private static final int MAX_COMMITS_PER_CYCLE = 5000;
58+
59+
private final GitRepositoryManager gitRepositoryManager;
60+
private final GitLabTokenService tokenService;
61+
private final CommitRepository commitRepository;
62+
private final CommitAuthorResolver authorResolver;
63+
private final ApplicationEventPublisher eventPublisher;
64+
private final TransactionTemplate transactionTemplate;
65+
66+
public GitLabCommitBackfillService(
67+
GitRepositoryManager gitRepositoryManager,
68+
GitLabTokenService tokenService,
69+
CommitRepository commitRepository,
70+
CommitAuthorResolver authorResolver,
71+
ApplicationEventPublisher eventPublisher,
72+
TransactionTemplate transactionTemplate
73+
) {
74+
this.gitRepositoryManager = gitRepositoryManager;
75+
this.tokenService = tokenService;
76+
this.commitRepository = commitRepository;
77+
this.authorResolver = authorResolver;
78+
this.eventPublisher = eventPublisher;
79+
this.transactionTemplate = transactionTemplate;
80+
}
81+
82+
/**
83+
* Backfills commits for a GitLab repository from its local git clone.
84+
*
85+
* <p>Idempotent: commits already in the database are skipped via
86+
* {@code existsByShaAndRepositoryId} fast-path. Returns immediately with
87+
* count 0 when local git is disabled.
88+
*
89+
* @param scopeId the workspace scope ID (for token resolution)
90+
* @param repository the repository entity
91+
* @return sync result with count of new commits persisted
92+
*/
93+
public SyncResult backfillCommits(Long scopeId, Repository repository) {
94+
if (!gitRepositoryManager.isEnabled()) {
95+
return SyncResult.completed(0);
96+
}
97+
98+
Long repoId = repository.getId();
99+
String repoName = sanitizeForLog(repository.getNameWithOwner());
100+
String defaultBranch = repository.getDefaultBranch();
101+
102+
if (defaultBranch == null || defaultBranch.isBlank()) {
103+
log.debug("Skipped commit backfill: reason=noDefaultBranch, repoId={}, repoName={}", repoId, repoName);
104+
return SyncResult.completed(0);
105+
}
106+
107+
try {
108+
// Phase 1: Clone/fetch (outside transaction — may be slow for initial clones)
109+
String serverUrl = tokenService.resolveServerUrl(scopeId);
110+
String token = tokenService.getAccessToken(scopeId);
111+
String cloneUrl = serverUrl + "/" + repository.getNameWithOwner() + ".git";
112+
gitRepositoryManager.ensureRepository(repoId, cloneUrl, token);
113+
114+
// Phase 2: Resolve HEAD of default branch
115+
String headSha = gitRepositoryManager.resolveDefaultBranchHead(repoId, defaultBranch);
116+
if (headSha == null) {
117+
log.warn(
118+
"Skipped commit backfill: reason=cannotResolveHead, repoId={}, repoName={}, branch={}",
119+
repoId,
120+
repoName,
121+
defaultBranch
122+
);
123+
return SyncResult.completed(0);
124+
}
125+
126+
// Phase 3: Determine walk range (incremental vs full)
127+
String fromSha = findLatestKnownSha(repoId);
128+
if (fromSha != null && fromSha.equals(headSha)) {
129+
log.debug(
130+
"Skipped commit backfill: reason=alreadyUpToDate, repoId={}, repoName={}, headSha={}",
131+
repoId,
132+
repoName,
133+
abbreviateSha(headSha)
134+
);
135+
return SyncResult.completed(0);
136+
}
137+
138+
// Phase 4: Walk commits
139+
List<GitRepositoryManager.CommitInfo> commitInfos = gitRepositoryManager.walkCommits(
140+
repoId,
141+
fromSha,
142+
headSha
143+
);
144+
145+
if (commitInfos.isEmpty()) {
146+
return SyncResult.completed(0);
147+
}
148+
149+
// Phase 5: Process commits (with batch limit)
150+
int total = commitInfos.size();
151+
boolean truncated = total > MAX_COMMITS_PER_CYCLE;
152+
List<GitRepositoryManager.CommitInfo> batch = truncated
153+
? commitInfos.subList(0, MAX_COMMITS_PER_CYCLE)
154+
: commitInfos;
155+
156+
int processed = 0;
157+
for (GitRepositoryManager.CommitInfo info : batch) {
158+
if (processCommitInfo(info, repository, scopeId, serverUrl)) {
159+
processed++;
160+
}
161+
}
162+
163+
if (truncated) {
164+
log.info(
165+
"Commit backfill batch limit reached: repoId={}, repoName={}, processed={}, total={}, remaining={}",
166+
repoId,
167+
repoName,
168+
processed,
169+
total,
170+
total - MAX_COMMITS_PER_CYCLE
171+
);
172+
} else if (processed > 0) {
173+
log.info(
174+
"Completed commit backfill: repoId={}, repoName={}, newCommits={}, totalWalked={}, mode={}",
175+
repoId,
176+
repoName,
177+
processed,
178+
total,
179+
fromSha != null ? "incremental" : "full"
180+
);
181+
}
182+
183+
return SyncResult.completed(processed);
184+
} catch (GitRepositoryManager.GitOperationException e) {
185+
log.error(
186+
"Commit backfill failed (git operation): repoId={}, repoName={}, error={}",
187+
repoId,
188+
repoName,
189+
e.getMessage()
190+
);
191+
return SyncResult.abortedError(0);
192+
} catch (Exception e) {
193+
log.error("Commit backfill failed: repoId={}, repoName={}, error={}", repoId, repoName, e.getMessage(), e);
194+
return SyncResult.abortedError(0);
195+
}
196+
}
197+
198+
@Nullable
199+
private String findLatestKnownSha(Long repositoryId) {
200+
return commitRepository.findLatestByRepositoryId(repositoryId).map(Commit::getSha).orElse(null);
201+
}
202+
203+
private boolean processCommitInfo(
204+
GitRepositoryManager.CommitInfo info,
205+
Repository repository,
206+
Long scopeId,
207+
String serverUrl
208+
) {
209+
Boolean result = transactionTemplate.execute(status -> {
210+
if (commitRepository.existsByShaAndRepositoryId(info.sha(), repository.getId())) {
211+
return false;
212+
}
213+
214+
Long providerId = repository.getProvider() != null ? repository.getProvider().getId() : null;
215+
Long authorId = authorResolver.resolveByEmail(info.authorEmail(), providerId);
216+
Long committerId = authorResolver.resolveByEmail(info.committerEmail(), providerId);
217+
218+
String message = info.message() != null ? info.message() : "";
219+
String htmlUrl = CommitUtils.buildGitLabCommitUrl(serverUrl, repository.getNameWithOwner(), info.sha());
220+
221+
commitRepository.upsertCommit(
222+
info.sha(),
223+
message,
224+
info.messageBody(),
225+
htmlUrl,
226+
info.authoredAt(),
227+
info.committedAt(),
228+
info.additions(),
229+
info.deletions(),
230+
info.changedFiles(),
231+
Instant.now(),
232+
repository.getId(),
233+
authorId,
234+
committerId,
235+
info.authorEmail(),
236+
info.committerEmail()
237+
);
238+
239+
if (!info.fileChanges().isEmpty()) {
240+
Commit commit = commitRepository.findByShaAndRepositoryId(info.sha(), repository.getId()).orElse(null);
241+
if (commit != null) {
242+
for (GitRepositoryManager.FileChange fc : info.fileChanges()) {
243+
CommitFileChange fileChange = new CommitFileChange();
244+
fileChange.setFilename(fc.filename());
245+
fileChange.setChangeType(CommitFileChange.fromGitChangeType(fc.changeType()));
246+
fileChange.setAdditions(fc.additions());
247+
fileChange.setDeletions(fc.deletions());
248+
fileChange.setChanges(fc.changes());
249+
fileChange.setPreviousFilename(fc.previousFilename());
250+
commit.addFileChange(fileChange);
251+
}
252+
commitRepository.save(commit);
253+
}
254+
}
255+
256+
publishCommitCreated(info.sha(), repository, scopeId);
257+
return true;
258+
});
259+
return Boolean.TRUE.equals(result);
260+
}
261+
262+
private void publishCommitCreated(String sha, Repository repository, Long scopeId) {
263+
Commit commit = commitRepository.findByShaAndRepositoryId(sha, repository.getId()).orElse(null);
264+
if (commit == null) {
265+
return;
266+
}
267+
268+
EventPayload.CommitData commitData = EventPayload.CommitData.from(commit);
269+
EventContext context = new EventContext(
270+
UUID.randomUUID(),
271+
Instant.now(),
272+
scopeId,
273+
RepositoryRef.from(repository),
274+
DataSource.GRAPHQL_SYNC,
275+
null,
276+
UUID.randomUUID().toString(),
277+
GitProviderType.GITLAB
278+
);
279+
280+
eventPublisher.publishEvent(new DomainEvent.CommitCreated(commitData, context));
281+
}
282+
283+
private static String abbreviateSha(String sha) {
284+
return sha.length() > 7 ? sha.substring(0, 7) : sha;
285+
}
286+
}

server/application-server/src/main/java/de/tum/in/www1/hephaestus/gitprovider/common/gitlab/GitLabSyncServiceHolder.java

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,5 +1,6 @@
11
package de.tum.in.www1.hephaestus.gitprovider.common.gitlab;
22

3+
import de.tum.in.www1.hephaestus.gitprovider.commit.gitlab.GitLabCommitBackfillService;
34
import de.tum.in.www1.hephaestus.gitprovider.commit.gitlab.GitLabCommitSyncService;
45
import de.tum.in.www1.hephaestus.gitprovider.issue.gitlab.GitLabIssueSyncService;
56
import de.tum.in.www1.hephaestus.gitprovider.issuedependency.gitlab.GitLabIssueDependencySyncService;
@@ -36,6 +37,7 @@ public class GitLabSyncServiceHolder {
3637
private final GitLabCollaboratorSyncService collaboratorSyncService;
3738
private final GitLabIssueTypeSyncService issueTypeSyncService;
3839
private final GitLabCommitSyncService commitSyncService;
40+
private final GitLabCommitBackfillService commitBackfillService;
3941
private final GitLabSubIssueSyncService subIssueSyncService;
4042
private final GitLabIssueDependencySyncService issueDependencySyncService;
4143

@@ -50,6 +52,7 @@ public GitLabSyncServiceHolder(
5052
@Nullable GitLabCollaboratorSyncService collaboratorSyncService,
5153
@Nullable GitLabIssueTypeSyncService issueTypeSyncService,
5254
@Nullable GitLabCommitSyncService commitSyncService,
55+
@Nullable GitLabCommitBackfillService commitBackfillService,
5356
@Nullable GitLabSubIssueSyncService subIssueSyncService,
5457
@Nullable GitLabIssueDependencySyncService issueDependencySyncService
5558
) {
@@ -63,6 +66,7 @@ public GitLabSyncServiceHolder(
6366
this.collaboratorSyncService = collaboratorSyncService;
6467
this.issueTypeSyncService = issueTypeSyncService;
6568
this.commitSyncService = commitSyncService;
69+
this.commitBackfillService = commitBackfillService;
6670
this.subIssueSyncService = subIssueSyncService;
6771
this.issueDependencySyncService = issueDependencySyncService;
6872
}
@@ -117,6 +121,11 @@ public GitLabCommitSyncService getCommitSyncService() {
117121
return commitSyncService;
118122
}
119123

124+
@Nullable
125+
public GitLabCommitBackfillService getCommitBackfillService() {
126+
return commitBackfillService;
127+
}
128+
120129
@Nullable
121130
public GitLabSubIssueSyncService getSubIssueSyncService() {
122131
return subIssueSyncService;

server/application-server/src/main/java/de/tum/in/www1/hephaestus/gitprovider/repository/RepositoryRepository.java

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -94,6 +94,21 @@ public interface RepositoryRepository extends JpaRepository<Repository, Long> {
9494
*/
9595
List<Repository> findAllByOrganization_LoginIgnoreCaseAndProviderId(String login, Long providerId);
9696

97+
/**
98+
* Finds all repositories that match the workspace's monitored repo names.
99+
* Uses nameWithOwner to join with RepositoryToMonitor entries,
100+
* ensuring subgroup repos are included (not just top-level group repos).
101+
*/
102+
@Query(
103+
"""
104+
SELECT r FROM Repository r LEFT JOIN FETCH r.provider
105+
WHERE r.nameWithOwner IN (
106+
SELECT m.nameWithOwner FROM RepositoryToMonitor m WHERE m.workspace.id = :workspaceId
107+
)
108+
"""
109+
)
110+
List<Repository> findAllByWorkspaceMonitors(@Param("workspaceId") Long workspaceId);
111+
97112
@Transactional
98113
@Modifying
99114
@Query(

0 commit comments

Comments
 (0)