Skip to content

Commit 9d65200

Browse files
authored
Fix language stat calculation (#11692)
* Fix language stat calculation * Group languages and ignore 0 size files * remove unneeded code
1 parent ea4c139 commit 9d65200

File tree

3 files changed

+29
-41
lines changed

3 files changed

+29
-41
lines changed

models/repo_language_stats.go

Lines changed: 1 addition & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -26,22 +26,6 @@ type LanguageStat struct {
2626
CreatedUnix timeutil.TimeStamp `xorm:"INDEX CREATED"`
2727
}
2828

29-
// specialLanguages defines list of languages that are excluded from the calculation
30-
// unless they are the only language present in repository. Only languages which under
31-
// normal circumstances are not considered to be code should be listed here.
32-
var specialLanguages = map[string]struct{}{
33-
"XML": {},
34-
"JSON": {},
35-
"TOML": {},
36-
"YAML": {},
37-
"INI": {},
38-
"SQL": {},
39-
"SVG": {},
40-
"Text": {},
41-
"Markdown": {},
42-
"other": {},
43-
}
44-
4529
// LanguageStatList defines a list of language statistics
4630
type LanguageStatList []*LanguageStat
4731

@@ -55,27 +39,12 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
5539
langPerc := make(map[string]float32)
5640
var otherPerc float32 = 100
5741
var total int64
58-
// Check that repository has at least one non-special language
59-
var skipSpecial bool
60-
for _, stat := range stats {
61-
if _, ok := specialLanguages[stat.Language]; !ok {
62-
skipSpecial = true
63-
break
64-
}
65-
}
42+
6643
for _, stat := range stats {
67-
// Exclude specific languages from percentage calculation
68-
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
69-
continue
70-
}
7144
total += stat.Size
7245
}
7346
if total > 0 {
7447
for _, stat := range stats {
75-
// Exclude specific languages from percentage calculation
76-
if _, ok := specialLanguages[stat.Language]; ok && skipSpecial {
77-
continue
78-
}
7948
perc := float32(math.Round(float64(stat.Size)/float64(total)*1000) / 10)
8049
if perc <= 0.1 {
8150
continue
@@ -84,8 +53,6 @@ func (stats LanguageStatList) getLanguagePercentages() map[string]float32 {
8453
langPerc[stat.Language] = perc
8554
}
8655
otherPerc = float32(math.Round(float64(otherPerc)*10) / 10)
87-
} else {
88-
otherPerc = 100
8956
}
9057
if otherPerc > 0 {
9158
langPerc["other"] = otherPerc

modules/git/repo_language_stats.go

Lines changed: 27 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -19,6 +19,20 @@ import (
1919

2020
const fileSizeLimit int64 = 16 * 1024 * 1024
2121

22+
// specialLanguages defines list of languages that are excluded from the calculation
23+
// unless they are the only language present in repository. Only languages which under
24+
// normal circumstances are not considered to be code should be listed here.
25+
var specialLanguages = []string{
26+
"XML",
27+
"JSON",
28+
"TOML",
29+
"YAML",
30+
"INI",
31+
"SVG",
32+
"Text",
33+
"Markdown",
34+
}
35+
2236
// GetLanguageStats calculates language stats for git repository at specified commit
2337
func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, error) {
2438
r, err := git.PlainOpen(repo.Path)
@@ -43,7 +57,7 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
4357

4458
sizes := make(map[string]int64)
4559
err = tree.Files().ForEach(func(f *object.File) error {
46-
if enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
60+
if f.Size == 0 || enry.IsVendor(f.Name) || enry.IsDotFile(f.Name) ||
4761
enry.IsDocumentation(f.Name) || enry.IsConfiguration(f.Name) {
4862
return nil
4963
}
@@ -58,7 +72,13 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
5872

5973
language := analyze.GetCodeLanguage(f.Name, content)
6074
if language == enry.OtherLanguage || language == "" {
61-
language = "other"
75+
return nil
76+
}
77+
78+
// group languages, such as Pug -> HTML; SCSS -> CSS
79+
group := enry.GetLanguageGroup(language)
80+
if group != "" {
81+
language = group
6282
}
6383

6484
sizes[language] += f.Size
@@ -69,8 +89,11 @@ func (repo *Repository) GetLanguageStats(commitID string) (map[string]int64, err
6989
return nil, err
7090
}
7191

72-
if len(sizes) == 0 {
73-
sizes["other"] = 0
92+
// filter special languages unless they are the only language
93+
if len(sizes) > 1 {
94+
for _, language := range specialLanguages {
95+
delete(sizes, language)
96+
}
7497
}
7598

7699
return sizes, nil

modules/indexer/stats/indexer_test.go

Lines changed: 1 addition & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -39,7 +39,5 @@ func TestRepoStatsIndex(t *testing.T) {
3939
assert.Equal(t, "65f1bf27bc3bf70f64657658635e66094edbcb4d", status.CommitSha)
4040
langs, err := repo.GetTopLanguageStats(5)
4141
assert.NoError(t, err)
42-
assert.Len(t, langs, 1)
43-
assert.Equal(t, "other", langs[0].Language)
44-
assert.Equal(t, float32(100), langs[0].Percentage)
42+
assert.Empty(t, langs)
4543
}

0 commit comments

Comments
 (0)