Skip to content

Commit 05292ac

Browse files
alexcheng1982markpollack
authored andcommitted
Fix NPE in TextReader source metadata
This commit addresses the NPE issue in TextReader's source metadata handling. It introduces a new method getResourceIdentifier() to robustly extract identifiers from various Resource types. The fix ensures that: 1. Filename is used if available 2. Falls back to URI, then URL if filename is not present 3. Uses resource description as a last resort Additionally, the commit includes updated tests to verify the behavior with different Resource types, particularly ByteArrayResource This change prevents NPEs when dealing with Resources that lack certain properties, improving the overall reliability of TextReader. Fixes spring-projects#1386
1 parent 439934b commit 05292ac

File tree

2 files changed

+92
-4
lines changed

2 files changed

+92
-4
lines changed

spring-ai-core/src/main/java/org/springframework/ai/reader/TextReader.java

Lines changed: 38 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -16,6 +16,8 @@
1616
package org.springframework.ai.reader;
1717

1818
import java.io.IOException;
19+
import java.net.URI;
20+
import java.net.URL;
1921
import java.nio.charset.Charset;
2022
import java.nio.charset.StandardCharsets;
2123
import java.util.HashMap;
@@ -45,11 +47,11 @@ public class TextReader implements DocumentReader {
4547
private final Resource resource;
4648

4749
/**
48-
* @return Character set to be used when loading data from the
50+
* Character set to be used when loading data from the
4951
*/
5052
private Charset charset = StandardCharsets.UTF_8;
5153

52-
private Map<String, Object> customMetadata = new HashMap<>();
54+
private final Map<String, Object> customMetadata = new HashMap<>();
5355

5456
public TextReader(String resourceUrl) {
5557
this(new DefaultResourceLoader().getResource(resourceUrl));
@@ -86,6 +88,7 @@ public List<Document> get() {
8688
// Inject source information as a metadata.
8789
this.customMetadata.put(CHARSET_METADATA, this.charset.name());
8890
this.customMetadata.put(SOURCE_METADATA, this.resource.getFilename());
91+
this.customMetadata.put(SOURCE_METADATA, getResourceIdentifier(this.resource));
8992

9093
return List.of(new Document(document, this.customMetadata));
9194

@@ -95,4 +98,37 @@ public List<Document> get() {
9598
}
9699
}
97100

101+
protected String getResourceIdentifier(Resource resource) {
102+
// Try to get the filename first
103+
String filename = resource.getFilename();
104+
if (filename != null && !filename.isEmpty()) {
105+
return filename;
106+
}
107+
108+
// Try to get the URI
109+
try {
110+
URI uri = resource.getURI();
111+
if (uri != null) {
112+
return uri.toString();
113+
}
114+
}
115+
catch (IOException ignored) {
116+
// If getURI() throws an exception, we'll try the next method
117+
}
118+
119+
// Try to get the URL
120+
try {
121+
URL url = resource.getURL();
122+
if (url != null) {
123+
return url.toString();
124+
}
125+
}
126+
catch (IOException ignored) {
127+
// If getURL() throws an exception, we'll fall back to getDescription()
128+
}
129+
130+
// If all else fails, use the description
131+
return resource.getDescription();
132+
}
133+
98134
}

spring-ai-core/src/test/java/org/springframework/ai/reader/TextReaderTests.java

Lines changed: 54 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,26 +15,33 @@
1515
*/
1616
package org.springframework.ai.reader;
1717

18+
import java.io.File;
19+
import java.io.IOException;
20+
import java.net.URI;
21+
import java.net.URL;
22+
import java.nio.charset.StandardCharsets;
1823
import java.util.List;
1924

2025
import org.junit.jupiter.api.Test;
2126

2227
import org.springframework.ai.document.Document;
2328
import org.springframework.ai.transformer.splitter.TokenTextSplitter;
29+
import org.springframework.core.io.ByteArrayResource;
2430
import org.springframework.core.io.DefaultResourceLoader;
31+
import org.springframework.core.io.FileSystemResource;
2532
import org.springframework.core.io.Resource;
2633

2734
import static org.assertj.core.api.Assertions.assertThat;
2835

2936
/**
3037
* @author Christian Tzolov
38+
* @author Mark Pollack
3139
*/
3240
public class TextReaderTests {
3341

34-
private Resource resource = new DefaultResourceLoader().getResource("classpath:text_source.txt");
35-
3642
@Test
3743
void loadText() {
44+
Resource resource = new DefaultResourceLoader().getResource("classpath:text_source.txt");
3845
assertThat(resource).isNotNull();
3946
TextReader textReader = new TextReader(resource);
4047
textReader.getCustomMetadata().put("customKey", "Value");
@@ -53,4 +60,49 @@ void loadText() {
5360
}
5461
}
5562

63+
@Test
64+
void loadTextFromByteArrayResource() {
65+
// Test with default constructor
66+
Resource defaultByteArrayResource = new ByteArrayResource("Test content".getBytes(StandardCharsets.UTF_8));
67+
assertThat(defaultByteArrayResource).isNotNull();
68+
TextReader defaultTextReader = new TextReader(defaultByteArrayResource);
69+
defaultTextReader.getCustomMetadata().put("customKey", "DefaultValue");
70+
71+
List<Document> defaultDocuments = defaultTextReader.get();
72+
73+
assertThat(defaultDocuments).hasSize(1);
74+
75+
Document defaultDocument = defaultDocuments.get(0);
76+
assertThat(defaultDocument.getMetadata()).containsEntry("customKey", "DefaultValue")
77+
.containsEntry(TextReader.CHARSET_METADATA, "UTF-8");
78+
79+
// Assert on the SOURCE_METADATA for default ByteArrayResource
80+
assertThat(defaultDocument.getMetadata().get(TextReader.SOURCE_METADATA))
81+
.isEqualTo("Byte array resource [resource loaded from byte array]");
82+
83+
assertThat(defaultDocument.getContent()).isEqualTo("Test content");
84+
85+
// Test with custom description constructor
86+
String customDescription = "Custom byte array resource";
87+
Resource customByteArrayResource = new ByteArrayResource(
88+
"Another test content".getBytes(StandardCharsets.UTF_8), customDescription);
89+
assertThat(customByteArrayResource).isNotNull();
90+
TextReader customTextReader = new TextReader(customByteArrayResource);
91+
customTextReader.getCustomMetadata().put("customKey", "CustomValue");
92+
93+
List<Document> customDocuments = customTextReader.get();
94+
95+
assertThat(customDocuments).hasSize(1);
96+
97+
Document customDocument = customDocuments.get(0);
98+
assertThat(customDocument.getMetadata()).containsEntry("customKey", "CustomValue")
99+
.containsEntry(TextReader.CHARSET_METADATA, "UTF-8");
100+
101+
// Assert on the SOURCE_METADATA for custom ByteArrayResource
102+
assertThat(customDocument.getMetadata().get(TextReader.SOURCE_METADATA))
103+
.isEqualTo("Byte array resource [Custom byte array resource]");
104+
105+
assertThat(customDocument.getContent()).isEqualTo("Another test content");
106+
}
107+
56108
}

0 commit comments

Comments
 (0)