Skip to content

Commit f26510e

Browse files
committed
test case: sitemap validator - added
1 parent f479576 commit f26510e

File tree

9 files changed

+245
-144
lines changed

9 files changed

+245
-144
lines changed

package.json

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -83,10 +83,12 @@
8383
"babel-jest": "29.7.0",
8484
"commander": "^12.1.0",
8585
"eslint": "^9.15.0",
86+
"fast-xml-parser": "^4.5.0",
8687
"jest": "29.7.0",
8788
"ncp": "^2.0.0",
8889
"rimraf": "6.0.1",
8990
"ts-node": "10.9.2",
90-
"typescript": "5.7.2"
91+
"typescript": "5.7.2",
92+
"xsd-schema-validator": "^0.10.0"
9193
}
9294
}

test/main.test.ts

Lines changed: 7 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -1,31 +1,11 @@
1-
import { readFileSync } from "fs";
2-
import { join } from "path";
3-
import { validateXML } from "xsd-schema-validator";
4-
import config from "../configLoader";
5-
import { makeSitemap } from "../lib/utils";
1+
import { Hawk } from "../lib/core";
2+
import validateSitemap from "./utils/validate-sitemap";
63

7-
async function _validateSitemap(): Promise<boolean> {
8-
//Generate site map
9-
await makeSitemap(true, [], [], true);
10-
11-
/* Loading sitemap.xml */
12-
const sitemapPath: string = config.sitemapPath;
13-
const sitemapXML: string = readFileSync(sitemapPath, {
14-
encoding: "utf8",
15-
});
16-
17-
const sitemapSchemaFile: string = join(__dirname, "sitemap-schema.xsd");
18-
19-
/* Validating */
20-
try {
21-
const result = await validateXML(sitemapXML, sitemapSchemaFile);
22-
return result.valid;
23-
} catch (err) {
24-
console.log(err);
25-
return false;
26-
}
27-
}
4+
const hawkInstance = new Hawk();
5+
const testSampleRootPath = "./test/test-sample";
286

297
test("Sitemap.xml validation", async () => {
30-
expect(await _validateSitemap()).toBe(true);
8+
expect(await validateSitemap(testSampleRootPath, hawkInstance)).toBe(
9+
true,
10+
);
3111
});
File renamed without changes.
File renamed without changes.
File renamed without changes.
Lines changed: 116 additions & 116 deletions
Original file line numberDiff line numberDiff line change
@@ -1,116 +1,116 @@
1-
<?xml version="1.0" encoding="UTF-8"?>
2-
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
3-
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
4-
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
5-
elementFormDefault="qualified">
6-
<xsd:annotation>
7-
<xsd:documentation>
8-
XML Schema for Sitemap files.
9-
Last Modifed 2008-03-26
10-
</xsd:documentation>
11-
</xsd:annotation>
12-
13-
<xsd:element name="urlset">
14-
<xsd:annotation>
15-
<xsd:documentation>
16-
Container for a set of up to 50,000 document elements.
17-
This is the root element of the XML file.
18-
</xsd:documentation>
19-
</xsd:annotation>
20-
<xsd:complexType>
21-
<xsd:sequence>
22-
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
23-
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
24-
</xsd:sequence>
25-
</xsd:complexType>
26-
</xsd:element>
27-
28-
<xsd:complexType name="tUrl">
29-
<xsd:annotation>
30-
<xsd:documentation>
31-
Container for the data needed to describe a document to crawl.
32-
</xsd:documentation>
33-
</xsd:annotation>
34-
<xsd:sequence>
35-
<xsd:element name="loc" type="tLoc"/>
36-
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
37-
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
38-
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
39-
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
40-
</xsd:sequence>
41-
</xsd:complexType>
42-
43-
<xsd:simpleType name="tLoc">
44-
<xsd:annotation>
45-
<xsd:documentation>
46-
REQUIRED: The location URI of a document.
47-
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
48-
</xsd:documentation>
49-
</xsd:annotation>
50-
<xsd:restriction base="xsd:anyURI">
51-
<xsd:minLength value="12"/>
52-
<xsd:maxLength value="2048"/>
53-
</xsd:restriction>
54-
</xsd:simpleType>
55-
56-
<xsd:simpleType name="tLastmod">
57-
<xsd:annotation>
58-
<xsd:documentation>
59-
OPTIONAL: The date the document was last modified. The date must conform
60-
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
61-
Example: 2005-05-10
62-
Lastmod may also contain a timestamp.
63-
Example: 2005-05-10T17:33:30+08:00
64-
</xsd:documentation>
65-
</xsd:annotation>
66-
<xsd:union>
67-
<xsd:simpleType>
68-
<xsd:restriction base="xsd:date"/>
69-
</xsd:simpleType>
70-
<xsd:simpleType>
71-
<xsd:restriction base="xsd:dateTime"/>
72-
</xsd:simpleType>
73-
</xsd:union>
74-
</xsd:simpleType>
75-
76-
<xsd:simpleType name="tChangeFreq">
77-
<xsd:annotation>
78-
<xsd:documentation>
79-
OPTIONAL: Indicates how frequently the content at a particular URL is
80-
likely to change. The value "always" should be used to describe
81-
documents that change each time they are accessed. The value "never"
82-
should be used to describe archived URLs. Please note that web
83-
crawlers may not necessarily crawl pages marked "always" more often.
84-
Consider this element as a friendly suggestion and not a command.
85-
</xsd:documentation>
86-
</xsd:annotation>
87-
<xsd:restriction base="xsd:string">
88-
<xsd:enumeration value="always"/>
89-
<xsd:enumeration value="hourly"/>
90-
<xsd:enumeration value="daily"/>
91-
<xsd:enumeration value="weekly"/>
92-
<xsd:enumeration value="monthly"/>
93-
<xsd:enumeration value="yearly"/>
94-
<xsd:enumeration value="never"/>
95-
</xsd:restriction>
96-
</xsd:simpleType>
97-
98-
<xsd:simpleType name="tPriority">
99-
<xsd:annotation>
100-
<xsd:documentation>
101-
OPTIONAL: The priority of a particular URL relative to other pages
102-
on the same site. The value for this element is a number between
103-
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
104-
The default priority of a page is 0.5. Priority is used to select
105-
between pages on your site. Setting a priority of 1.0 for all URLs
106-
will not help you, as the relative priority of pages on your site
107-
is what will be considered.
108-
</xsd:documentation>
109-
</xsd:annotation>
110-
<xsd:restriction base="xsd:decimal">
111-
<xsd:minInclusive value="0.0"/>
112-
<xsd:maxInclusive value="1.0"/>
113-
</xsd:restriction>
114-
</xsd:simpleType>
115-
116-
</xsd:schema>
1+
<?xml version="1.0" encoding="UTF-8"?>
2+
<xsd:schema xmlns:xsd="http://www.w3.org/2001/XMLSchema"
3+
targetNamespace="http://www.sitemaps.org/schemas/sitemap/0.9"
4+
xmlns="http://www.sitemaps.org/schemas/sitemap/0.9"
5+
elementFormDefault="qualified">
6+
<xsd:annotation>
7+
<xsd:documentation>
8+
XML Schema for Sitemap files.
9+
Last Modifed 2008-03-26
10+
</xsd:documentation>
11+
</xsd:annotation>
12+
13+
<xsd:element name="urlset">
14+
<xsd:annotation>
15+
<xsd:documentation>
16+
Container for a set of up to 50,000 document elements.
17+
This is the root element of the XML file.
18+
</xsd:documentation>
19+
</xsd:annotation>
20+
<xsd:complexType>
21+
<xsd:sequence>
22+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
23+
<xsd:element name="url" type="tUrl" maxOccurs="unbounded"/>
24+
</xsd:sequence>
25+
</xsd:complexType>
26+
</xsd:element>
27+
28+
<xsd:complexType name="tUrl">
29+
<xsd:annotation>
30+
<xsd:documentation>
31+
Container for the data needed to describe a document to crawl.
32+
</xsd:documentation>
33+
</xsd:annotation>
34+
<xsd:sequence>
35+
<xsd:element name="loc" type="tLoc"/>
36+
<xsd:element name="lastmod" type="tLastmod" minOccurs="0"/>
37+
<xsd:element name="changefreq" type="tChangeFreq" minOccurs="0"/>
38+
<xsd:element name="priority" type="tPriority" minOccurs="0"/>
39+
<xsd:any namespace="##other" minOccurs="0" maxOccurs="unbounded" processContents="strict"/>
40+
</xsd:sequence>
41+
</xsd:complexType>
42+
43+
<xsd:simpleType name="tLoc">
44+
<xsd:annotation>
45+
<xsd:documentation>
46+
REQUIRED: The location URI of a document.
47+
The URI must conform to RFC 2396 (http://www.ietf.org/rfc/rfc2396.txt).
48+
</xsd:documentation>
49+
</xsd:annotation>
50+
<xsd:restriction base="xsd:anyURI">
51+
<xsd:minLength value="12"/>
52+
<xsd:maxLength value="2048"/>
53+
</xsd:restriction>
54+
</xsd:simpleType>
55+
56+
<xsd:simpleType name="tLastmod">
57+
<xsd:annotation>
58+
<xsd:documentation>
59+
OPTIONAL: The date the document was last modified. The date must conform
60+
to the W3C DATETIME format (http://www.w3.org/TR/NOTE-datetime).
61+
Example: 2005-05-10
62+
Lastmod may also contain a timestamp.
63+
Example: 2005-05-10T17:33:30+08:00
64+
</xsd:documentation>
65+
</xsd:annotation>
66+
<xsd:union>
67+
<xsd:simpleType>
68+
<xsd:restriction base="xsd:date"/>
69+
</xsd:simpleType>
70+
<xsd:simpleType>
71+
<xsd:restriction base="xsd:dateTime"/>
72+
</xsd:simpleType>
73+
</xsd:union>
74+
</xsd:simpleType>
75+
76+
<xsd:simpleType name="tChangeFreq">
77+
<xsd:annotation>
78+
<xsd:documentation>
79+
OPTIONAL: Indicates how frequently the content at a particular URL is
80+
likely to change. The value "always" should be used to describe
81+
documents that change each time they are accessed. The value "never"
82+
should be used to describe archived URLs. Please note that web
83+
crawlers may not necessarily crawl pages marked "always" more often.
84+
Consider this element as a friendly suggestion and not a command.
85+
</xsd:documentation>
86+
</xsd:annotation>
87+
<xsd:restriction base="xsd:string">
88+
<xsd:enumeration value="always"/>
89+
<xsd:enumeration value="hourly"/>
90+
<xsd:enumeration value="daily"/>
91+
<xsd:enumeration value="weekly"/>
92+
<xsd:enumeration value="monthly"/>
93+
<xsd:enumeration value="yearly"/>
94+
<xsd:enumeration value="never"/>
95+
</xsd:restriction>
96+
</xsd:simpleType>
97+
98+
<xsd:simpleType name="tPriority">
99+
<xsd:annotation>
100+
<xsd:documentation>
101+
OPTIONAL: The priority of a particular URL relative to other pages
102+
on the same site. The value for this element is a number between
103+
0.0 and 1.0 where 0.0 identifies the lowest priority page(s).
104+
The default priority of a page is 0.5. Priority is used to select
105+
between pages on your site. Setting a priority of 1.0 for all URLs
106+
will not help you, as the relative priority of pages on your site
107+
is what will be considered.
108+
</xsd:documentation>
109+
</xsd:annotation>
110+
<xsd:restriction base="xsd:decimal">
111+
<xsd:minInclusive value="0.0"/>
112+
<xsd:maxInclusive value="1.0"/>
113+
</xsd:restriction>
114+
</xsd:simpleType>
115+
116+
</xsd:schema>

test/utils/validate-sitemap.ts

Lines changed: 87 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,87 @@
1+
import { XMLParser } from "fast-xml-parser";
2+
import { globSync } from "glob";
3+
import { existsSync, readFileSync, rmSync } from "node:fs";
4+
import { join } from "node:path";
5+
import { validateXML } from "xsd-schema-validator";
6+
import { type Hawk } from "../../lib/core";
7+
8+
export default async function validateSitemap(
9+
testSampleRootPath: string,
10+
hawkInstance: Hawk,
11+
): Promise<boolean> {
12+
process.chdir(testSampleRootPath);
13+
14+
const lookupPattern = ["**/*.html"];
15+
const uploadToFTP = false;
16+
const expectedSitemapOutputPath = "test-sitemap.xml";
17+
18+
hawkInstance.configurations.sitemapPath = expectedSitemapOutputPath;
19+
await hawkInstance.utils.makeSitemap(
20+
lookupPattern,
21+
[],
22+
false,
23+
uploadToFTP,
24+
);
25+
26+
//check sitemap if exist
27+
const siteMapExist = existsSync(expectedSitemapOutputPath);
28+
29+
if (siteMapExist) {
30+
//validate sitemap with schema
31+
const sitemapXML: string = readFileSync(expectedSitemapOutputPath, {
32+
encoding: "utf8",
33+
});
34+
35+
//delete sitemap as no longer needed
36+
rmSync(expectedSitemapOutputPath, { recursive: true, force: true });
37+
38+
const sitemapSchemaFile = "sitemap-schema.xsd";
39+
40+
const { valid } = await validateXML(sitemapXML, sitemapSchemaFile);
41+
42+
if (valid) {
43+
//check number of available routes against nof available files;
44+
const parser = new XMLParser();
45+
const parsed = parser.parse(sitemapXML);
46+
47+
const urls = parsed.urlset.url.map(
48+
(url: { loc: string; lastmod: string }) => url.loc,
49+
);
50+
const availableRoutes = globSync(lookupPattern);
51+
52+
const numberOfRoutesinMap = urls.length;
53+
const numberOfFiles = availableRoutes.length;
54+
55+
const expectedRoutesCount = numberOfFiles === numberOfRoutesinMap;
56+
57+
if (expectedRoutesCount) {
58+
//ping to all routes if any failed return false
59+
return _pingRoutes(urls);
60+
}
61+
} else {
62+
console.log("⚠️ Sitemap failed at schematic test");
63+
}
64+
} else {
65+
console.log("⚠️ Sitemap not found!");
66+
}
67+
68+
return false;
69+
}
70+
71+
function _pingRoutes(urls: string[]): boolean {
72+
return urls.every((url: string) => {
73+
let { pathname } = new URL(url);
74+
75+
if (pathname === "/") pathname = "index";
76+
77+
const filePath = join(process.cwd(), pathname + ".html");
78+
79+
const goodRoute = existsSync(filePath);
80+
81+
if (!goodRoute) {
82+
console.log(`⚠️ Ping failed on: ${url}`);
83+
}
84+
85+
return goodRoute;
86+
});
87+
}

tsconfig.json

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,5 +37,6 @@
3737
"node_modules",
3838
"test",
3939
"jest.config.ts",
40+
"dist"
4041
]
4142
}

0 commit comments

Comments
 (0)