Skip to content
Merged
Show file tree
Hide file tree
Changes from 193 commits
Commits
Show all changes
212 commits
Select commit Hold shift + click to select a range
b367cb0
Fix out of date docker-py deps
dagardner-nv Jun 2, 2025
1806914
Work-around for #2219
dagardner-nv Jun 2, 2025
e81702a
Update conda deps
dagardner-nv Jun 2, 2025
d882215
Adding new deps from Tad
dagardner-nv Jun 2, 2025
658648c
Adding new deps from Tad
dagardner-nv Jun 2, 2025
9ff316f
Resolve dependency conflicts, add missing dep for yaml (currently in …
dagardner-nv Jun 2, 2025
94c577d
WIP
dagardner-nv Jun 2, 2025
d962de0
Adding cr header
dagardner-nv Jun 2, 2025
ef664bf
WIP
dagardner-nv Jun 3, 2025
d6f9920
Update datasets and huggingface_hub libraries to match requirements o…
dagardner-nv Jun 3, 2025
03f9bdb
Pin click and setuptools
dagardner-nv Jun 3, 2025
89abcbd
Be more specific with setuptools versions
dagardner-nv Jun 3, 2025
6da41ee
Updae conda envs
dagardner-nv Jun 3, 2025
a1f0cd3
WIP
dagardner-nv Jun 3, 2025
711254f
Merge branch 'david-dep-issues' of github.com:dagardner-nv/Morpheus i…
dagardner-nv Jun 3, 2025
060d9e6
Don't pin the model to device 1
dagardner-nv Jun 3, 2025
da74c07
Just use the std lib re
dagardner-nv Jun 3, 2025
be47c08
Remove eval and unused imports
dagardner-nv Jun 3, 2025
a6fbcee
Remove unneeded bit
dagardner-nv Jun 3, 2025
f5bfa4f
Remove unused dep
dagardner-nv Jun 3, 2025
de03d39
Add set of regular expressions
dagardner-nv Jun 3, 2025
04a6201
Add module dir
dagardner-nv Jun 3, 2025
81d6499
Remove unused modules
dagardner-nv Jun 3, 2025
d2f77c8
Don't print all the results
dagardner-nv Jun 3, 2025
509fe3a
WIP
dagardner-nv Jun 4, 2025
ff59da7
datasets is now a CLI flag
dagardner-nv Jun 4, 2025
086044a
Source stage for pulling data from huggingface
dagardner-nv Jun 4, 2025
e51fae1
Refactor DLPInputProcessor as a Morpheus stage
dagardner-nv Jun 4, 2025
ec9e75a
First pass at refactoring RegexProcessor as a stage
dagardner-nv Jun 4, 2025
fd4fb6d
Make num_samples a flag
dagardner-nv Jun 4, 2025
5fe1bc8
Fix syntax
dagardner-nv Jun 4, 2025
5196df5
Fixes
dagardner-nv Jun 4, 2025
6c464f8
WIP
dagardner-nv Jun 4, 2025
a6b950a
Switch to processing records one row at a time, using one of the cudf…
dagardner-nv Jun 4, 2025
8ce9072
First pass at a gliner stage
dagardner-nv Jun 4, 2025
f6059e9
Remove unused import
dagardner-nv Jun 5, 2025
7f38ddd
Switch to applying the regex on a per-row basis, as this allows us to…
dagardner-nv Jun 5, 2025
1cd00da
First pass at refactoring RiskScorer as a stage
dagardner-nv Jun 5, 2025
98fedca
Fix type hint
dagardner-nv Jun 5, 2025
2772bb3
WIP
dagardner-nv Jun 5, 2025
3779e9a
Cleanup
dagardner-nv Jun 5, 2025
e6580c8
Set max_model_length
dagardner-nv Jun 5, 2025
6c84ead
Remove model_max_length, as it was not working it appears to be a kno…
dagardner-nv Jun 5, 2025
e090ca5
Flatten the scores output
dagardner-nv Jun 5, 2025
9c01359
Minor improvements
dagardner-nv Jun 5, 2025
9e9adfb
Rename gliner_findings to dlp_findings
dagardner-nv Jun 5, 2025
668850d
Fix setting fo GpuAndCpuMixin
dagardner-nv Jun 5, 2025
8b29060
Run the pipeline in CPU execution mode
dagardner-nv Jun 5, 2025
0dbb14d
Remove unused import
dagardner-nv Jun 5, 2025
6db058f
Work-around what appears to be a bug in the serialization stage
dagardner-nv Jun 5, 2025
d6fed72
Remove temporary work-around
dagardner-nv Jun 5, 2025
3abc552
Fix spelling errors, restructure readme
dagardner-nv Jun 5, 2025
5a5394b
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jun 5, 2025
cd882be
Revert unintentional changes
dagardner-nv Jun 5, 2025
9fde32e
Lint fixes and other cleanups
dagardner-nv Jun 5, 2025
2dab666
Update README.md
tzemicheal Jun 5, 2025
a62e58f
Add CR header
dagardner-nv Jun 5, 2025
dabea93
Merge branch 'david-tzm-dlp' of github.com:dagardner-nv/Morpheus into…
dagardner-nv Jun 5, 2025
2fb2a5e
Add preallocations
dagardner-nv Jun 5, 2025
67f3c72
Cleanup monitor stage labels
dagardner-nv Jun 5, 2025
e3ebb9e
Optimization to handle situation where chunking isn't used
dagardner-nv Jun 5, 2025
e856bf8
Fix handling of output file path
dagardner-nv Jun 6, 2025
944d484
Batch process data
dagardner-nv Jun 6, 2025
50f7890
Lazily load the model
dagardner-nv Jun 6, 2025
54af433
Remove unused import
dagardner-nv Jun 6, 2025
7785d5c
Install gliner from pip, ensuring we don't accidentally install a cpu…
dagardner-nv Jun 6, 2025
b73d575
Document the need to install torch by hand on Arm
dagardner-nv Jun 6, 2025
be14b32
pin to 0.2.19, 2.20 isn't working with our version of torch
dagardner-nv Jun 6, 2025
9448db5
Switch to updated gliner, and specify cache dir
dagardner-nv Jun 6, 2025
903c5fa
Switch to performing regexes in cudf
dagardner-nv Jun 9, 2025
12c9be9
Switch to using cudf regex
dagardner-nv Jun 9, 2025
dcfbdbb
Exclude the privacy mask by default, enabled with flag
dagardner-nv Jun 9, 2025
246125d
Switch to performing a pandas apply
dagardner-nv Jun 9, 2025
0dac664
Remove the PreallocatorMixin from the DLPInputProcessor stage
dagardner-nv Jun 9, 2025
468bf9d
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jun 10, 2025
66b766c
Optionally use an input file, optionally repeat the input data
dagardner-nv Jun 10, 2025
f1842ae
Add triton inference processor
tzemicheal Jun 10, 2025
ffada50
Merge branch 'tz-david-tzm-dlp' of github.com:tzemicheal/Morpheus int…
dagardner-nv Jun 10, 2025
bf1b51c
Triton code as-is from Tad
dagardner-nv Jun 11, 2025
e6e552a
Lazily load the model, construct the client once in the constructor
dagardner-nv Jun 11, 2025
04b2aea
Add License headers
dagardner-nv Jun 11, 2025
77791f9
Use grpc
dagardner-nv Jun 11, 2025
cc68730
Ensure that post processing happens on the GPU
dagardner-nv Jun 11, 2025
5161d67
Merge pull request #11 from dagardner-nv/david-tzm-dlp-tz-triton
dagardner-nv Jun 11, 2025
9660526
Use a list of tuples
dagardner-nv Jun 11, 2025
e02b8ca
Misc cleanups, drop usage of onnx locally as this casuses the model t…
dagardner-nv Jun 11, 2025
e5973b7
WIP
dagardner-nv Jun 11, 2025
499af04
Cleanup
dagardner-nv Jun 11, 2025
8ecb9bf
Merge branch 'david-tzm-dlp' of github.com:dagardner-nv/Morpheus into…
dagardner-nv Jun 11, 2025
e3382f5
Switch to async
dagardner-nv Jun 11, 2025
37d2e7d
Merge branch 'david-tzm-dlp-tz-triton' into david-tzm-dlp
dagardner-nv Jun 11, 2025
a6b09f2
Lint fix
dagardner-nv Jun 11, 2025
ece50f8
Remove unused model_cache_dir
dagardner-nv Jun 11, 2025
fd69b3b
Lint fix
dagardner-nv Jun 11, 2025
fdea060
Make server_url a cli flag
dagardner-nv Jun 11, 2025
4cbbe47
Remove the need for a second loop
dagardner-nv Jun 11, 2025
bbb1fde
Clean up type hints
dagardner-nv Jun 12, 2025
192ef01
Expose command line flags to enable chunking
dagardner-nv Jun 12, 2025
183429b
Fix requesting needed columns
dagardner-nv Jun 12, 2025
17e4e98
Replace broken chunking feature with a split on new-line chars
dagardner-nv Jun 12, 2025
a5c2159
Always split on paragraphs, don't fallback, filter non-matched rows (…
dagardner-nv Jun 13, 2025
ca19410
Lint fixes
dagardner-nv Jun 13, 2025
9d3b630
Aggregate data by the original index
dagardner-nv Jun 13, 2025
c15e31b
Adjust weights to match the labels
dagardner-nv Jun 13, 2025
58e86de
Remove weights not in the labels
dagardner-nv Jun 13, 2025
f82a429
Remove unused import, adjust weight calculations per TZM
dagardner-nv Jun 13, 2025
ebfba25
Fix the calculations of scores
dagardner-nv Jun 13, 2025
8ffb846
Better handling of output columns
dagardner-nv Jun 13, 2025
10a7768
Add a --regex_only flag
dagardner-nv Jun 14, 2025
eb618d3
Run the scorer for regex only
dagardner-nv Jun 16, 2025
8120bbd
Handle the findings from regex only
dagardner-nv Jun 16, 2025
74787aa
Fix handling of regex labels
dagardner-nv Jun 16, 2025
119d0f2
Relocate the mode to the models dir
dagardner-nv Jun 16, 2025
78ff92b
Update README to include triton instructions
dagardner-nv Jun 16, 2025
9a6da64
Moving files to LFS
dagardner-nv Jun 16, 2025
58f4a5c
Move json files to LFS
dagardner-nv Jun 16, 2025
c883e51
Include information about fetching the model with git lfs
dagardner-nv Jun 16, 2025
c373d2f
Update to no longer use the pytorch model using only the onnx model
dagardner-nv Jun 16, 2025
2b3c791
Ensure a CUDA enabled version of onnxruntime is installed, install gl…
dagardner-nv Jun 16, 2025
9ca61b9
DLPInputProcessor is now responsible for converting from MessageMeta …
dagardner-nv Jun 16, 2025
5917ea2
WIP
dagardner-nv Jun 24, 2025
3223073
WIP
dagardner-nv Jun 24, 2025
10b40a3
Shelving this for a while, the current cpp impl is somehow resulting …
dagardner-nv Jun 24, 2025
adba646
WIP
dagardner-nv Jun 24, 2025
e896dfb
Add pipeline batch size
dagardner-nv Jun 25, 2025
3ca6548
Merge branch 'david-tzm-dlp' into david-tzm-dlp-cpp-regex
dagardner-nv Jun 25, 2025
fb03e8a
Fix building AST tree
dagardner-nv Jun 25, 2025
ff34b9d
Fix label concatenation
dagardner-nv Jun 25, 2025
f25e291
Log the df length
dagardner-nv Jun 25, 2025
25fc535
Time the entire run, not a single call
dagardner-nv Jun 25, 2025
d739df0
David tzm dlp cpp regex (#13)
dagardner-nv Jun 25, 2025
845e114
Remove debug printing
dagardner-nv Jun 25, 2025
090d59a
Ugh LFS
dagardner-nv Jun 25, 2025
9d30c98
Merge branch 'david-tzm-dlp-cpp-regex' into david-tzm-dlp
dagardner-nv Jun 25, 2025
8deda9a
WIP
dagardner-nv Jun 25, 2025
b540d95
Remove redundant patterns
dagardner-nv Jun 25, 2025
ef4b42c
Remove redundant patterns
dagardner-nv Jun 25, 2025
f4f3161
Remove old work-around, and print statements
dagardner-nv Jun 25, 2025
c229146
Clean up the timing code
dagardner-nv Jun 25, 2025
bc4ae45
Remove more redundant regexes
dagardner-nv Jun 25, 2025
2e2d492
Remove more redundant regexes
dagardner-nv Jun 25, 2025
0f29382
Use apply rather than iterating over groups
dagardner-nv Jun 25, 2025
433f30c
Remove unused import
dagardner-nv Jun 26, 2025
fc662a7
Revert unintentional change
dagardner-nv Jun 26, 2025
0d3379b
Revert temporary timing code
dagardner-nv Jun 26, 2025
7d0385b
Remove timing code
dagardner-nv Jun 26, 2025
10a5566
Remove timing code
dagardner-nv Jun 26, 2025
902c1f0
Remove scripts from LFS
dagardner-nv Jun 26, 2025
3b05df3
Adjust LFS matching
dagardner-nv Jun 26, 2025
29d3aed
Add scripts back in
dagardner-nv Jun 26, 2025
7c4fb14
Remove debug stage
dagardner-nv Jun 26, 2025
9a929c1
Rename extension to conform with Morpheus naming
dagardner-nv Jun 26, 2025
d7fd972
IWYU fixes
dagardner-nv Jun 26, 2025
988520f
Remove nervaluate
dagardner-nv Jun 26, 2025
1140411
Remove unused parameter
dagardner-nv Jun 26, 2025
a4da5ae
Add num_threads flag
dagardner-nv Jun 26, 2025
8869774
Add docstrings to RiskScorer
dagardner-nv Jun 26, 2025
0391c38
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jun 27, 2025
d2290a1
Fix serializing to JSON for dataframes when they contain a struct field
dagardner-nv Jun 30, 2025
bbc8279
Remove redundant for-loop
dagardner-nv Jun 30, 2025
a719a66
Replace custom DLP output stages with Morpheus built-in stages
dagardner-nv Jun 30, 2025
610c242
Remove un-needed DLP output stages
dagardner-nv Jun 30, 2025
8c644ee
Make get_data public and avoid making copies of TableInfoData
dagardner-nv Jun 30, 2025
f3e024b
IWYU fixes
dagardner-nv Jun 30, 2025
88f831f
Fix gramerical error in error message
dagardner-nv Jun 30, 2025
0296bcb
Remove redundant code
dagardner-nv Jun 30, 2025
1e8bb34
Rename variable
dagardner-nv Jun 30, 2025
6bfd9ba
Remove unneeded monitor stage
dagardner-nv Jun 30, 2025
9987117
Remove redundant loop
dagardner-nv Jun 30, 2025
3ffe3f1
Revert "Remove redundant loop"
dagardner-nv Jun 30, 2025
5edaf2b
Cleanup pull the if statement out of the loop
dagardner-nv Jun 30, 2025
f8d177c
Add missing else clause for Paquet
dagardner-nv Jul 1, 2025
3ac6a7d
Add missing case statement for Parquet
dagardner-nv Jul 1, 2025
5288b9d
WIP
dagardner-nv Jul 1, 2025
438acd0
Handle index columns
dagardner-nv Jul 1, 2025
da98af1
Add triton repo configs for gliner model
dagardner-nv Jul 1, 2025
9ac3ffa
Slim down the input processor stage
dagardner-nv Jul 1, 2025
0755343
Combine the two replace statements
dagardner-nv Jul 1, 2025
2bb3228
WIP
dagardner-nv Jul 2, 2025
b1d803c
Revert "WIP"
dagardner-nv Jul 2, 2025
0a3b9c5
Specify no capture
dagardner-nv Jul 2, 2025
25b39fc
Add missing include
dagardner-nv Jul 2, 2025
87a33ab
Don't emit an empty table
dagardner-nv Jul 7, 2025
c635639
Update examples/data_loss_prevention/dlp_stages/_lib/CMakeLists.txt
dagardner-nv Jul 7, 2025
47255df
Replace explicit version pins with version ranges
dagardner-nv Jul 7, 2025
0eedd6f
Remove timing code, document default values, remove restriction on NL…
dagardner-nv Jul 7, 2025
4ebd4e0
Document default values in the docstring
dagardner-nv Jul 7, 2025
1fa2bb4
Add docstrings
dagardner-nv Jul 7, 2025
0ed70f1
Add default values to docstrings
dagardner-nv Jul 7, 2025
870ba30
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jul 7, 2025
674adaa
Add round-trip test for write_df_to_file/read_file_to_df and include …
dagardner-nv Jul 8, 2025
c43a58c
Open parquet files as binary, add optional include_index_col arg for …
dagardner-nv Jul 8, 2025
bf12770
Add new tests
dagardner-nv Jul 8, 2025
792b302
Add unittest for the new get_column override
dagardner-nv Jul 8, 2025
7864d53
IWYU fixes
dagardner-nv Jul 8, 2025
fd502d7
Merge branch 'branch-25.06' of github.com:nv-morpheus/Morpheus into d…
dagardner-nv Jul 8, 2025
5a0d40e
Support single regex pattern
dagardner-nv Jul 8, 2025
4c589bb
replace print with logger
dagardner-nv Jul 8, 2025
88b1f71
Apply suggestions from code review
dagardner-nv Jul 8, 2025
e9368de
Address PR feedback
dagardner-nv Jul 9, 2025
fa5f3d4
Remove --include_privacy_masks flag as this isn't supported in the pi…
dagardner-nv Jul 9, 2025
b4e0de6
Merge branch 'david-tzm-dlp' of github.com:dagardner-nv/Morpheus into…
dagardner-nv Jul 9, 2025
6f73723
Add comment explanation
dagardner-nv Jul 9, 2025
4a4b466
Remove unused logger
dagardner-nv Jul 9, 2025
6e7861d
Remove unneeded assert
dagardner-nv Jul 9, 2025
bfd9554
Remove early/unneeded variable assignment
dagardner-nv Jul 9, 2025
a320b9d
Remove old docstring
dagardner-nv Jul 9, 2025
9687226
Update README.md of HC PR feedback
tzemicheal Jul 9, 2025
349056b
Move max_score to a class var
dagardner-nv Jul 9, 2025
c75bb89
Merge branch 'david-tzm-dlp' of github.com:dagardner-nv/Morpheus into…
dagardner-nv Jul 9, 2025
dabf1a7
Fix allignment
dagardner-nv Jul 9, 2025
80ba916
Adjust regex to match missing row
dagardner-nv Jul 9, 2025
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions ci/vale/styles/config/vocabularies/morpheus/accept.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,6 +71,7 @@ pytest
[Ss]ubclassing
[Ss]ubcard(s?)
[Ss]ubgraph(s?)
[Ss]ubnet(s?)
[Ss]ubword(s?)
[Ss]uperset(s?)
[Tt]imestamp(s?)
Expand Down
7 changes: 5 additions & 2 deletions conda/environments/all_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ dependencies:
- cxx-compiler
- cython=3.0
- datacompy=0.13.1
- datasets=3.6
- dill=0.3.7
- docker-py>=7.1,<8
- doxygen=1.9.2
Expand All @@ -52,7 +53,7 @@ dependencies:
- gtest=1.14
- gxx=12.1
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- huggingface_hub>=0.24,<1.0
- indicators=2.3
- ipython
- isort
Expand All @@ -79,6 +80,7 @@ dependencies:
- numexpr
- numpydoc=1.5
- onnx=1.15
- onnxruntime=1.22=*cuda
- openai==1.13.*
- papermill=2.6.0
- pip
Expand Down Expand Up @@ -114,14 +116,15 @@ dependencies:
- sqlalchemy<2.0
- sysroot_linux-64>=2.28
- tqdm=4
- transformers=4.36.2
- transformers>=4.40,<5.0
- tritonclient=2.34
- typing_utils=0.1
- urllib3>=2.3,<3
- versioneer
- versioneer-518
- watchdog=3.0
- websockets
- yaml=0.2
- yapf=0.43
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
Expand Down
7 changes: 5 additions & 2 deletions conda/environments/all_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ dependencies:
- cxx-compiler
- cython=3.0
- datacompy=0.13.1
- datasets=3.6
- dill=0.3.7
- docker-py>=7.1,<8
- doxygen=1.9.2
Expand All @@ -52,7 +53,7 @@ dependencies:
- gtest=1.14
- gxx=12.1
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- huggingface_hub>=0.24,<1.0
- include-what-you-use=0.20
- indicators=2.3
- ipython
Expand Down Expand Up @@ -81,6 +82,7 @@ dependencies:
- numexpr
- numpydoc=1.5
- onnx=1.15
- onnxruntime=1.22=*cuda
- openai==1.13.*
- papermill=2.6.0
- pip
Expand Down Expand Up @@ -117,7 +119,7 @@ dependencies:
- sqlalchemy<2.0
- sysroot_linux-64>=2.28
- tqdm=4
- transformers=4.36.2
- transformers>=4.40,<5.0
- tritonclient=2.34
- typing_utils=0.1
- urllib3>=2.3,<3
Expand All @@ -128,6 +130,7 @@ dependencies:
- versioneer-518
- watchdog=3.0
- websockets
- yaml=0.2
- yapf=0.43
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
Expand Down
1 change: 1 addition & 0 deletions conda/environments/dev_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -103,6 +103,7 @@ dependencies:
- versioneer-518
- watchdog=3.0
- websockets
- yaml=0.2
- yapf=0.43
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
Expand Down
1 change: 1 addition & 0 deletions conda/environments/dev_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,7 @@ dependencies:
- versioneer-518
- watchdog=3.0
- websockets
- yaml=0.2
- yapf=0.43
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
Expand Down
7 changes: 5 additions & 2 deletions conda/environments/examples_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ dependencies:
- cuml=25.02.*
- cupy<13.4
- datacompy=0.13.1
- datasets=3.6
- dill=0.3.7
- docker-py>=7.1,<8
- elasticsearch==8.9.0
- feedparser=6.0
- grpcio
- grpcio-status
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- huggingface_hub>=0.24,<1.0
- jsonpatch>=1.33
- kfp
- libwebp=1.3.2
Expand All @@ -38,6 +39,7 @@ dependencies:
- numexpr
- numpydoc=1.5
- onnx=1.15
- onnxruntime=1.22=*cuda
- openai==1.13.*
- papermill=2.6.0
- pip
Expand All @@ -57,12 +59,13 @@ dependencies:
- scikit-learn=1.3.2
- sqlalchemy<2.0
- tqdm=4
- transformers=4.36.2
- transformers>=4.40,<5.0
- tritonclient=2.34
- typing_utils=0.1
- urllib3>=2.3,<3
- watchdog=3.0
- websockets
- yaml=0.2
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
- --find-links https://data.dgl.ai/wheels/torch-2.3/repo.html
Expand Down
7 changes: 5 additions & 2 deletions conda/environments/examples_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -19,14 +19,15 @@ dependencies:
- cuml=25.02.*
- cupy<13.4
- datacompy=0.13.1
- datasets=3.6
- dill=0.3.7
- docker-py>=7.1,<8
- elasticsearch==8.9.0
- feedparser=6.0
- grpcio
- grpcio-status
- httpx>=0.23,<0.28
- huggingface_hub=0.20.2
- huggingface_hub>=0.24,<1.0
- jsonpatch>=1.33
- kfp
- libwebp=1.3.2
Expand All @@ -39,6 +40,7 @@ dependencies:
- numexpr
- numpydoc=1.5
- onnx=1.15
- onnxruntime=1.22=*cuda
- openai==1.13.*
- papermill=2.6.0
- pip
Expand All @@ -59,12 +61,13 @@ dependencies:
- scikit-learn=1.3.2
- sqlalchemy<2.0
- tqdm=4
- transformers=4.36.2
- transformers>=4.40,<5.0
- tritonclient=2.34
- typing_utils=0.1
- urllib3>=2.3,<3
- watchdog=3.0
- websockets
- yaml=0.2
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
- --find-links https://data.dgl.ai/wheels/torch-2.3/repo.html
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/model-utils_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ dependencies:
- cuml=25.02.*
- jupyterlab
- matplotlib
- onnx
- onnx=1.15
- pandas
- pip
- python=3.12
- scikit-learn=1.3.2
- seaborn
- seqeval=1.2.2
- transformers=4.36.2
- transformers>=4.40,<5.0
- xgboost
- pip:
- tensorrt-cu12
Expand Down
4 changes: 2 additions & 2 deletions conda/environments/model-utils_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -12,14 +12,14 @@ dependencies:
- cuml=25.02.*
- jupyterlab
- matplotlib
- onnx
- onnx=1.15
- pandas
- pip
- python=3.12
- scikit-learn=1.3.2
- seaborn
- seqeval=1.2.2
- transformers=4.36.2
- transformers>=4.40,<5.0
- xgboost
- pip:
- tensorrt-cu12
Expand Down
1 change: 1 addition & 0 deletions conda/environments/runtime_cuda-128_arch-aarch64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies:
- urllib3>=2.3,<3
- watchdog=3.0
- websockets
- yaml=0.2
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
- databricks-cli < 0.100
Expand Down
1 change: 1 addition & 0 deletions conda/environments/runtime_cuda-128_arch-x86_64.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ dependencies:
- urllib3>=2.3,<3
- watchdog=3.0
- websockets
- yaml=0.2
- pip:
- --extra-index-url https://download.pytorch.org/whl/cu124
- databricks-cli < 0.100
Expand Down
19 changes: 15 additions & 4 deletions dependencies.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ files:
- development
- docs
- example-dfp-prod
- example-dlp
- example-gnn
- example-llms
- python
Expand Down Expand Up @@ -152,6 +153,7 @@ files:
- cve-mitigation
- example-abp-nvsmi
- example-dfp-prod
- example-dlp
- example-gnn
- example-llms
- python
Expand Down Expand Up @@ -418,6 +420,7 @@ dependencies:
- urllib3>=2.3,<3
- watchdog=3.0
- websockets
- yaml=0.2
- pip
- pip:
- &torch-extra-index --extra-index-url https://download.pytorch.org/whl/cu124
Expand Down Expand Up @@ -559,14 +562,14 @@ dependencies:
common:
- output_types: [conda]
packages:
- &transformers transformers=4.36.2 # newer versions are incompatible with our pinned version of huggingface_hub
- &transformers transformers>=4.40,<5.0
- anyio>=3.7
- arxiv=1.4
- httpx>=0.23,<0.28 # work-around for https://github.com/openai/openai-python/issues/1915
- huggingface_hub=0.20.2 # work-around for https://github.com/UKPLab/sentence-transformers/issues/1762
- huggingface_hub>=0.24,<1.0
- jsonpatch>=1.33
- numexpr
- onnx=1.15
- &onnx onnx=1.15
- *openai
- pypdf=3.17.4
- *python-docx
Expand Down Expand Up @@ -606,7 +609,7 @@ dependencies:
- scikit-learn=1.3.2
- jupyterlab
- matplotlib
- onnx
- *onnx
- pandas
- seaborn
- seqeval=1.2.2
Expand All @@ -620,3 +623,11 @@ dependencies:
- output_types: [conda]
packages:
- libwebp=1.3.2 # Required for CVE mitigation: https://nvd.nist.gov/vuln/detail/CVE-2023-4863

example-dlp:
common:
- output_types: [conda]
packages:
- datasets=3.6
- onnxruntime=1.22=*cuda
# gliner intentionally omitted due to https://github.com/urchade/GLiNER/issues/267
1 change: 1 addition & 0 deletions examples/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -17,5 +17,6 @@ list(APPEND CMAKE_MESSAGE_CONTEXT "examples")
list(PREPEND CMAKE_PREFIX_PATH "${PROJECT_BINARY_DIR}")

add_subdirectory(developer_guide)
add_subdirectory(data_loss_prevention)

list(POP_BACK CMAKE_MESSAGE_CONTEXT)
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,7 @@ limitations under the License.
## Environments
Morpheus supports multiple environments, each environment is intended to support a given use-case. Each example documents which environments it is able to run in. With the exception of the Morpheus Release Container, the examples require fetching both the `datasets` and `examples` dataset via the `fetch_data.sh` script:
```bash
git lfs install
./scripts/fetch_data.py fetch examples datasets
```

Expand Down
79 changes: 79 additions & 0 deletions examples/data_loss_prevention/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,79 @@
# SPDX-FileCopyrightText: Copyright (c) 2022-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# SPDX-License-Identifier: Apache-2.0
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

cmake_minimum_required(VERSION 3.25 FATAL_ERROR)

list(APPEND CMAKE_MESSAGE_CONTEXT "dlp")

# Set the cache to be the same to allow for CCache to be used effectively
set(MORPHEUS_CACHE_DIR "${CMAKE_SOURCE_DIR}/.cache" CACHE PATH "Directory to contain all CPM and CCache data")
mark_as_advanced(MORPHEUS_CACHE_DIR)

# Add the Conda environment to the prefix path and add the CMake files
list(PREPEND CMAKE_PREFIX_PATH "$ENV{CONDA_PREFIX}")

project(dlp
VERSION 25.06.00
LANGUAGES C CXX
)

set(CMAKE_CXX_STANDARD 20)
set(CMAKE_CXX_STANDARD_REQUIRED ON)
set(CMAKE_CXX_EXTENSIONS ON)
set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
set(CMAKE_INSTALL_RPATH_USE_LINK_PATH TRUE)
set(CMAKE_INSTALL_RPATH "$ORIGIN")

# Set the option prefix to match the outer project before including. Must be before find_package(morpheus)
set(OPTION_PREFIX "MORPHEUS")

# Set the policy to allow for CMP0144, avoids warning about MORPHEUS_ROOT being set
cmake_policy(SET CMP0144 NEW)

find_package(morpheus REQUIRED)
find_package(glog REQUIRED) # work-around for #2149

morpheus_utils_initialize_cpm(MORPHEUS_CACHE_DIR)

# Ensure CPM is initialized
rapids_cpm_init()

morpheus_utils_python_configure()

rapids_find_package(CUDAToolkit REQUIRED)
rapids_find_package(cudf REQUIRED)

set(CMAKE_POSITION_INDEPENDENT_CODE TRUE)
set(CMAKE_EXPORT_COMPILE_COMMANDS ON)

morpheus_utils_create_python_package(dlp_stages
PROJECT_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}"
SOURCE_DIRECTORY "${CMAKE_CURRENT_SOURCE_DIR}/dlp_stages"
)

add_subdirectory(dlp_stages/_lib)

# Complete the python package
if(MORPHEUS_PYTHON_INPLACE_BUILD)
list(APPEND extra_args "IS_INPLACE")
endif()

if(TARGET morpheus-package-install)
list(APPEND extra_args "PYTHON_DEPENDENCIES" "morpheus-package-install")
endif()

morpheus_utils_build_python_package(dlp_stages ${extra_args})

list(POP_BACK CMAKE_MESSAGE_CONTEXT)
Loading