|
| 1 | +package de.tum.in.www1.hephaestus.gitprovider.commit.gitlab; |
| 2 | + |
| 3 | +import static de.tum.in.www1.hephaestus.core.LoggingUtils.sanitizeForLog; |
| 4 | + |
| 5 | +import de.tum.in.www1.hephaestus.gitprovider.commit.Commit; |
| 6 | +import de.tum.in.www1.hephaestus.gitprovider.commit.CommitAuthorResolver; |
| 7 | +import de.tum.in.www1.hephaestus.gitprovider.commit.CommitFileChange; |
| 8 | +import de.tum.in.www1.hephaestus.gitprovider.commit.CommitRepository; |
| 9 | +import de.tum.in.www1.hephaestus.gitprovider.commit.util.CommitUtils; |
| 10 | +import de.tum.in.www1.hephaestus.gitprovider.common.DataSource; |
| 11 | +import de.tum.in.www1.hephaestus.gitprovider.common.GitProviderType; |
| 12 | +import de.tum.in.www1.hephaestus.gitprovider.common.events.DomainEvent; |
| 13 | +import de.tum.in.www1.hephaestus.gitprovider.common.events.EventContext; |
| 14 | +import de.tum.in.www1.hephaestus.gitprovider.common.events.EventPayload; |
| 15 | +import de.tum.in.www1.hephaestus.gitprovider.common.events.RepositoryRef; |
| 16 | +import de.tum.in.www1.hephaestus.gitprovider.common.gitlab.GitLabTokenService; |
| 17 | +import de.tum.in.www1.hephaestus.gitprovider.git.GitRepositoryManager; |
| 18 | +import de.tum.in.www1.hephaestus.gitprovider.repository.Repository; |
| 19 | +import de.tum.in.www1.hephaestus.gitprovider.sync.SyncResult; |
| 20 | +import java.time.Instant; |
| 21 | +import java.util.List; |
| 22 | +import java.util.UUID; |
| 23 | +import org.slf4j.Logger; |
| 24 | +import org.slf4j.LoggerFactory; |
| 25 | +import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty; |
| 26 | +import org.springframework.context.ApplicationEventPublisher; |
| 27 | +import org.springframework.lang.Nullable; |
| 28 | +import org.springframework.stereotype.Service; |
| 29 | +import org.springframework.transaction.support.TransactionTemplate; |
| 30 | + |
| 31 | +/** |
| 32 | + * Backfills commit history for GitLab repositories via local JGit clones. |
| 33 | + * |
| 34 | + * <p>Mirrors {@link de.tum.in.www1.hephaestus.gitprovider.commit.github.GitHubCommitBackfillService} |
| 35 | + * to provide full diff statistics (additions, deletions, changedFiles) and file change |
| 36 | + * tracking — data that the GitLab REST API commit list endpoint does not provide. |
| 37 | + * |
| 38 | + * <p><b>How it works:</b> |
| 39 | + * <ol> |
| 40 | + * <li>Clone or fetch the repository via {@link GitRepositoryManager}</li> |
| 41 | + * <li>Resolve the HEAD SHA of the default branch</li> |
| 42 | + * <li>Walk new commits since the last known SHA (or full history on first backfill)</li> |
| 43 | + * <li>For each commit: upsert via native SQL, attach file changes, publish events</li> |
| 44 | + * </ol> |
| 45 | + * |
| 46 | + * <p>When {@code hephaestus.git.enabled=false}, {@link #backfillCommits} returns |
| 47 | + * {@link SyncResult#completed(int) SyncResult.completed(0)} so callers can fall |
| 48 | + * back to the REST-based {@link GitLabCommitSyncService}. |
| 49 | + * |
| 50 | + * @see GitLabCommitSyncService |
| 51 | + */ |
| 52 | +@Service |
| 53 | +@ConditionalOnProperty(prefix = "hephaestus.gitlab", name = "enabled", havingValue = "true") |
| 54 | +public class GitLabCommitBackfillService { |
| 55 | + |
| 56 | + private static final Logger log = LoggerFactory.getLogger(GitLabCommitBackfillService.class); |
| 57 | + private static final int MAX_COMMITS_PER_CYCLE = 5000; |
| 58 | + |
| 59 | + private final GitRepositoryManager gitRepositoryManager; |
| 60 | + private final GitLabTokenService tokenService; |
| 61 | + private final CommitRepository commitRepository; |
| 62 | + private final CommitAuthorResolver authorResolver; |
| 63 | + private final ApplicationEventPublisher eventPublisher; |
| 64 | + private final TransactionTemplate transactionTemplate; |
| 65 | + |
| 66 | + public GitLabCommitBackfillService( |
| 67 | + GitRepositoryManager gitRepositoryManager, |
| 68 | + GitLabTokenService tokenService, |
| 69 | + CommitRepository commitRepository, |
| 70 | + CommitAuthorResolver authorResolver, |
| 71 | + ApplicationEventPublisher eventPublisher, |
| 72 | + TransactionTemplate transactionTemplate |
| 73 | + ) { |
| 74 | + this.gitRepositoryManager = gitRepositoryManager; |
| 75 | + this.tokenService = tokenService; |
| 76 | + this.commitRepository = commitRepository; |
| 77 | + this.authorResolver = authorResolver; |
| 78 | + this.eventPublisher = eventPublisher; |
| 79 | + this.transactionTemplate = transactionTemplate; |
| 80 | + } |
| 81 | + |
| 82 | + /** |
| 83 | + * Backfills commits for a GitLab repository from its local git clone. |
| 84 | + * |
| 85 | + * <p>Idempotent: commits already in the database are skipped via |
| 86 | + * {@code existsByShaAndRepositoryId} fast-path. Returns immediately with |
| 87 | + * count 0 when local git is disabled. |
| 88 | + * |
| 89 | + * @param scopeId the workspace scope ID (for token resolution) |
| 90 | + * @param repository the repository entity |
| 91 | + * @return sync result with count of new commits persisted |
| 92 | + */ |
| 93 | + public SyncResult backfillCommits(Long scopeId, Repository repository) { |
| 94 | + if (!gitRepositoryManager.isEnabled()) { |
| 95 | + return SyncResult.completed(0); |
| 96 | + } |
| 97 | + |
| 98 | + Long repoId = repository.getId(); |
| 99 | + String repoName = sanitizeForLog(repository.getNameWithOwner()); |
| 100 | + String defaultBranch = repository.getDefaultBranch(); |
| 101 | + |
| 102 | + if (defaultBranch == null || defaultBranch.isBlank()) { |
| 103 | + log.debug("Skipped commit backfill: reason=noDefaultBranch, repoId={}, repoName={}", repoId, repoName); |
| 104 | + return SyncResult.completed(0); |
| 105 | + } |
| 106 | + |
| 107 | + try { |
| 108 | + // Phase 1: Clone/fetch (outside transaction — may be slow for initial clones) |
| 109 | + String serverUrl = tokenService.resolveServerUrl(scopeId); |
| 110 | + String token = tokenService.getAccessToken(scopeId); |
| 111 | + String cloneUrl = serverUrl + "/" + repository.getNameWithOwner() + ".git"; |
| 112 | + gitRepositoryManager.ensureRepository(repoId, cloneUrl, token); |
| 113 | + |
| 114 | + // Phase 2: Resolve HEAD of default branch |
| 115 | + String headSha = gitRepositoryManager.resolveDefaultBranchHead(repoId, defaultBranch); |
| 116 | + if (headSha == null) { |
| 117 | + log.warn( |
| 118 | + "Skipped commit backfill: reason=cannotResolveHead, repoId={}, repoName={}, branch={}", |
| 119 | + repoId, |
| 120 | + repoName, |
| 121 | + defaultBranch |
| 122 | + ); |
| 123 | + return SyncResult.completed(0); |
| 124 | + } |
| 125 | + |
| 126 | + // Phase 3: Determine walk range (incremental vs full) |
| 127 | + String fromSha = findLatestKnownSha(repoId); |
| 128 | + if (fromSha != null && fromSha.equals(headSha)) { |
| 129 | + log.debug( |
| 130 | + "Skipped commit backfill: reason=alreadyUpToDate, repoId={}, repoName={}, headSha={}", |
| 131 | + repoId, |
| 132 | + repoName, |
| 133 | + abbreviateSha(headSha) |
| 134 | + ); |
| 135 | + return SyncResult.completed(0); |
| 136 | + } |
| 137 | + |
| 138 | + // Phase 4: Walk commits |
| 139 | + List<GitRepositoryManager.CommitInfo> commitInfos = gitRepositoryManager.walkCommits( |
| 140 | + repoId, |
| 141 | + fromSha, |
| 142 | + headSha |
| 143 | + ); |
| 144 | + |
| 145 | + if (commitInfos.isEmpty()) { |
| 146 | + return SyncResult.completed(0); |
| 147 | + } |
| 148 | + |
| 149 | + // Phase 5: Process commits (with batch limit) |
| 150 | + int total = commitInfos.size(); |
| 151 | + boolean truncated = total > MAX_COMMITS_PER_CYCLE; |
| 152 | + List<GitRepositoryManager.CommitInfo> batch = truncated |
| 153 | + ? commitInfos.subList(0, MAX_COMMITS_PER_CYCLE) |
| 154 | + : commitInfos; |
| 155 | + |
| 156 | + int processed = 0; |
| 157 | + for (GitRepositoryManager.CommitInfo info : batch) { |
| 158 | + if (processCommitInfo(info, repository, scopeId, serverUrl)) { |
| 159 | + processed++; |
| 160 | + } |
| 161 | + } |
| 162 | + |
| 163 | + if (truncated) { |
| 164 | + log.info( |
| 165 | + "Commit backfill batch limit reached: repoId={}, repoName={}, processed={}, total={}, remaining={}", |
| 166 | + repoId, |
| 167 | + repoName, |
| 168 | + processed, |
| 169 | + total, |
| 170 | + total - MAX_COMMITS_PER_CYCLE |
| 171 | + ); |
| 172 | + } else if (processed > 0) { |
| 173 | + log.info( |
| 174 | + "Completed commit backfill: repoId={}, repoName={}, newCommits={}, totalWalked={}, mode={}", |
| 175 | + repoId, |
| 176 | + repoName, |
| 177 | + processed, |
| 178 | + total, |
| 179 | + fromSha != null ? "incremental" : "full" |
| 180 | + ); |
| 181 | + } |
| 182 | + |
| 183 | + return SyncResult.completed(processed); |
| 184 | + } catch (GitRepositoryManager.GitOperationException e) { |
| 185 | + log.error( |
| 186 | + "Commit backfill failed (git operation): repoId={}, repoName={}, error={}", |
| 187 | + repoId, |
| 188 | + repoName, |
| 189 | + e.getMessage() |
| 190 | + ); |
| 191 | + return SyncResult.abortedError(0); |
| 192 | + } catch (Exception e) { |
| 193 | + log.error("Commit backfill failed: repoId={}, repoName={}, error={}", repoId, repoName, e.getMessage(), e); |
| 194 | + return SyncResult.abortedError(0); |
| 195 | + } |
| 196 | + } |
| 197 | + |
| 198 | + @Nullable |
| 199 | + private String findLatestKnownSha(Long repositoryId) { |
| 200 | + return commitRepository.findLatestByRepositoryId(repositoryId).map(Commit::getSha).orElse(null); |
| 201 | + } |
| 202 | + |
| 203 | + private boolean processCommitInfo( |
| 204 | + GitRepositoryManager.CommitInfo info, |
| 205 | + Repository repository, |
| 206 | + Long scopeId, |
| 207 | + String serverUrl |
| 208 | + ) { |
| 209 | + Boolean result = transactionTemplate.execute(status -> { |
| 210 | + if (commitRepository.existsByShaAndRepositoryId(info.sha(), repository.getId())) { |
| 211 | + return false; |
| 212 | + } |
| 213 | + |
| 214 | + Long providerId = repository.getProvider() != null ? repository.getProvider().getId() : null; |
| 215 | + Long authorId = authorResolver.resolveByEmail(info.authorEmail(), providerId); |
| 216 | + Long committerId = authorResolver.resolveByEmail(info.committerEmail(), providerId); |
| 217 | + |
| 218 | + String message = info.message() != null ? info.message() : ""; |
| 219 | + String htmlUrl = CommitUtils.buildGitLabCommitUrl(serverUrl, repository.getNameWithOwner(), info.sha()); |
| 220 | + |
| 221 | + commitRepository.upsertCommit( |
| 222 | + info.sha(), |
| 223 | + message, |
| 224 | + info.messageBody(), |
| 225 | + htmlUrl, |
| 226 | + info.authoredAt(), |
| 227 | + info.committedAt(), |
| 228 | + info.additions(), |
| 229 | + info.deletions(), |
| 230 | + info.changedFiles(), |
| 231 | + Instant.now(), |
| 232 | + repository.getId(), |
| 233 | + authorId, |
| 234 | + committerId, |
| 235 | + info.authorEmail(), |
| 236 | + info.committerEmail() |
| 237 | + ); |
| 238 | + |
| 239 | + if (!info.fileChanges().isEmpty()) { |
| 240 | + Commit commit = commitRepository.findByShaAndRepositoryId(info.sha(), repository.getId()).orElse(null); |
| 241 | + if (commit != null) { |
| 242 | + for (GitRepositoryManager.FileChange fc : info.fileChanges()) { |
| 243 | + CommitFileChange fileChange = new CommitFileChange(); |
| 244 | + fileChange.setFilename(fc.filename()); |
| 245 | + fileChange.setChangeType(CommitFileChange.fromGitChangeType(fc.changeType())); |
| 246 | + fileChange.setAdditions(fc.additions()); |
| 247 | + fileChange.setDeletions(fc.deletions()); |
| 248 | + fileChange.setChanges(fc.changes()); |
| 249 | + fileChange.setPreviousFilename(fc.previousFilename()); |
| 250 | + commit.addFileChange(fileChange); |
| 251 | + } |
| 252 | + commitRepository.save(commit); |
| 253 | + } |
| 254 | + } |
| 255 | + |
| 256 | + publishCommitCreated(info.sha(), repository, scopeId); |
| 257 | + return true; |
| 258 | + }); |
| 259 | + return Boolean.TRUE.equals(result); |
| 260 | + } |
| 261 | + |
| 262 | + private void publishCommitCreated(String sha, Repository repository, Long scopeId) { |
| 263 | + Commit commit = commitRepository.findByShaAndRepositoryId(sha, repository.getId()).orElse(null); |
| 264 | + if (commit == null) { |
| 265 | + return; |
| 266 | + } |
| 267 | + |
| 268 | + EventPayload.CommitData commitData = EventPayload.CommitData.from(commit); |
| 269 | + EventContext context = new EventContext( |
| 270 | + UUID.randomUUID(), |
| 271 | + Instant.now(), |
| 272 | + scopeId, |
| 273 | + RepositoryRef.from(repository), |
| 274 | + DataSource.GRAPHQL_SYNC, |
| 275 | + null, |
| 276 | + UUID.randomUUID().toString(), |
| 277 | + GitProviderType.GITLAB |
| 278 | + ); |
| 279 | + |
| 280 | + eventPublisher.publishEvent(new DomainEvent.CommitCreated(commitData, context)); |
| 281 | + } |
| 282 | + |
| 283 | + private static String abbreviateSha(String sha) { |
| 284 | + return sha.length() > 7 ? sha.substring(0, 7) : sha; |
| 285 | + } |
| 286 | +} |
0 commit comments