diff --git a/.coveragerc b/.coveragerc index f54ec86..6b7b835 100644 --- a/.coveragerc +++ b/.coveragerc @@ -4,5 +4,6 @@ omit = setup.py docs/* test* + */test* */app.py */wsgi.py diff --git a/.gitignore b/.gitignore index 34ec3f8..fd34d24 100644 --- a/.gitignore +++ b/.gitignore @@ -100,6 +100,8 @@ ENV/ # mypy .mypy_cache/ +.pytest_cache/ + .DS_Store submissions.tsv diff --git a/.travis.yml b/.travis.yml index c0fe89b..ec36a2a 100644 --- a/.travis.yml +++ b/.travis.yml @@ -2,7 +2,7 @@ language: python sudo: required services: - docker -cache: pip +#cache: pip env: MIN_PYLINT_SCORE: 8 os: @@ -11,10 +11,11 @@ python: - "3.6" script: - pip install pipenv - - pipenv install - pipenv install --dev - - pipenv run nose2 --with-coverage - - tests/lint.sh - - tests/docstyle.sh + - pipenv install ./core + - pipenv run pip show arxiv-base + - pipenv run pytest --cov=agent/agent --cov=core/arxiv --cov-report=term-missing agent/agent core/arxiv after_success: - coveralls +# - tests/lint.sh + - tests/docstyle.sh diff --git a/Dockerfile b/Dockerfile new file mode 100644 index 0000000..055a8ee --- /dev/null +++ b/Dockerfile @@ -0,0 +1,24 @@ +# arxiv/submission-agent + +ARG BASE_VERSION=ARXIVNG-2462 + +FROM arxiv/base:${BASE_VERSION} + +WORKDIR /opt/arxiv + +ENV KINESIS_STREAM="SubmissionEvents" \ + KINESIS_SHARD_ID="0" \ + KINESIS_START_TYPE="TRIM_HORIZON" \ + SUBMISSION_AGENT_DATABASE_URI="" \ + LOGLEVEL=10 \ + JWT_SECRET="foo" + +COPY Pipfile Pipfile.lock /opt/arxiv/ +COPY core/ /opt/arxiv/core/ +RUN pipenv install /opt/arxiv/core/ && rm -rf ~/.cache/pip +COPY agent/agent/ /opt/arxiv/agent/ + +ENTRYPOINT ["pipenv", "run"] + +CMD ["python", "-m", "agent.consumer"] +# CMD ["celery", "worker", "-A", "agent.worker.worker_app", "--loglevel=INFO", "-E", "--concurrency=2"] \ No newline at end of file diff --git a/Dockerfile-metadata b/Dockerfile-metadata index 4d61d6f..5f838aa 100644 --- a/Dockerfile-metadata +++ b/Dockerfile-metadata @@ -1,6 +1,6 @@ # arxiv/submission-api -FROM arxiv/base:0.7.1 +FROM arxiv/base WORKDIR /opt/arxiv @@ -12,14 +12,14 @@ ENV LANG en_US.utf8 RUN yum -y install mariadb-devel # Add Python consumer and configuration. -ADD Pipfile /opt/arxiv/Pipfile -ADD Pipfile.lock /opt/arxiv/Pipfile.lock +ADD metadata/Pipfile /opt/arxiv/Pipfile RUN pip install pipenv RUN pipenv install +ADD core/ /opt/arxiv/core/ +RUN pipenv install /opt/arxiv/core/ + ADD metadata/ /opt/arxiv/ -ADD authorization/authorization/ /opt/arxiv/authorization/ -ADD core/events/ /opt/arxiv/events/ ADD metadata/bin/entrypoint.sh /opt/arxiv/entrypoint.sh ENV JWT_SECRET "foo" @@ -29,5 +29,6 @@ EXPOSE 8000 CMD ./entrypoint.sh --http-socket :8000 -w wsgi -M \ -t 3000 --manage-script-name \ + --buffer-size 65535 \ --processes 8 --threads 1 --async 100 --ugreen \ --mount /submission=wsgi.py diff --git a/Pipfile b/Pipfile index 41ff9e7..d39b946 100644 --- a/Pipfile +++ b/Pipfile @@ -1,32 +1,50 @@ [[source]] - url = "https://pypi.python.org/simple" verify_ssl = true name = "pypi" - [packages] - -flask = "*" -dataclasses = "*" -pyjwt = "*" -jsonschema = "*" -arxiv-base = "==0.7.1" -sqlalchemy = "*" -pyyaml = "*" -pytz = "*" -uwsgi = "*" -mysqlclient = "*" -mimesis = "*" - +flask = "==1.0.2" +dataclasses = "==0.6" +pyjwt = "==1.6.4" +jsonschema = "==2.6.0" +sqlalchemy = ">=1.3.0" +pytz = "==2018.7" +uwsgi = "==2.0.17.1" +mysqlclient = "==1.3.13" +mimesis = "==2.1.0" +bleach = ">=3.0.2" +python-dateutil = "*" +unidecode = "*" +celery = "==4.1.0" +arxiv-auth = "==0.3.2rc5" +mypy_extensions = "*" +kombu = "==4.1.0" +semver = "*" +flask-sqlalchemy = "*" +retry = "*" +backports-datetime-fromisoformat = "*" +arxiv-base = "==0.15.8rc4" +alembic = "*" +urllib3 = ">=1.24.2" +Jinja2 = ">=2.10.1" +pyyaml = ">=4.2b1" +arxiv-vault = "==0.1.1rc15" +redis = "==2.10.6" [dev-packages] - "nose2" = "*" openapi-spec-validator = "*" -coverage = "*" +coverage = "==4.5" coveralls = "*" -pylint = "*" -pydocstyle = "*" -mypy = "*" +pylint = "<2" +mypy = "==0.660" mimesis = "*" +sphinx = "*" +sphinx-autodoc-typehints = "*" +pydocstyle = "==3.0.0" +pytest = "*" +pytest-cov = "*" + +[pipenv] +allow_prereleases = true diff --git a/Pipfile.lock b/Pipfile.lock index bd2dca6..fc49063 100644 --- a/Pipfile.lock +++ b/Pipfile.lock @@ -1,7 +1,7 @@ { "_meta": { "hash": { - "sha256": "6cf1b086839177323652d41e37f76f1000b513bf50808e348b2fbba87fbafa3b" + "sha256": "9dc4d03eef24a15edd0fa7fd26f249b6af8ac23298a3e14dcbaf053afa926f22" }, "pipfile-spec": 6, "requires": {}, @@ -14,19 +14,107 @@ ] }, "default": { + "alembic": { + "hashes": [ + "sha256:828dcaa922155a2b7166c4f36ec45268944e4055c86499bd14319b4c8c0094b7" + ], + "index": "pypi", + "version": "==1.0.10" + }, + "amqp": { + "hashes": [ + "sha256:aa4409446139676943a2eaa27d5f58caf750f4ca5a89f888c452afd86be6a67d", + "sha256:cbb6f87d53cac612a594f982b717cc1c54c6a1e17943a0a0d32dc6cc9e2120c8" + ], + "version": "==2.5.0" + }, + "arxiv-auth": { + "hashes": [ + "sha256:7a328e13eb25ca504cfc7d821f873a6f1fb8671873bada88d99ad235df676198" + ], + "index": "pypi", + "version": "==0.3.2rc5" + }, "arxiv-base": { "hashes": [ - "sha256:e386f771296d2971d0435640a732dc3c77c4f7856ddabfe0daba9e3bafbd6dfb" + "sha256:9a0f7e5f213f7debbe13c74afdee39784e8339e10a776d33af8ecab574ea08ab" + ], + "index": "pypi", + "version": "==0.15.8rc4" + }, + "arxiv-submission-core": { + "path": "./core" + }, + "arxiv-vault": { + "hashes": [ + "sha256:a8f636a1adabea00054d8625766c450da71ec6d79623115019367977148f9f11" ], "index": "pypi", - "version": "==0.7.1" + "version": "==0.1.1rc15" + }, + "backports-datetime-fromisoformat": { + "hashes": [ + "sha256:9577a2a9486cd7383a5f58b23bb8e81cf0821dbbc0eb7c87d3fa198c1df40f5c" + ], + "index": "pypi", + "version": "==1.0.0" + }, + "billiard": { + "hashes": [ + "sha256:42d9a227401ac4fba892918bba0a0c409def5435c4b483267ebfe821afaaba0e" + ], + "version": "==3.5.0.5" + }, + "bleach": { + "hashes": [ + "sha256:213336e49e102af26d9cde77dd2d0397afabc5a6bf2fed985dc35b5d1e285a16", + "sha256:3fdf7f77adcf649c9911387df51254b813185e32b2c6619f690b593a617e19fa" + ], + "index": "pypi", + "version": "==3.1.0" + }, + "boto3": { + "hashes": [ + "sha256:8a8219c2d7c3f10bda255b78d99dfabe9a15d6a5a96ca6bcfa51ba5ca204105c", + "sha256:aa1a95f7fc850dd734893946d8e6024ff9bd06515fa5c13ad96396efa6d83ce8" + ], + "version": "==1.9.159" + }, + "botocore": { + "hashes": [ + "sha256:2f90e4d435b45bd708046b8e647c649bb7ff48f26892b86a2869f271a33270f0", + "sha256:35a199173d91791b9a4d69e1a02752fa5276f57b56cd1d26dd3144883c9f21e9" + ], + "version": "==1.12.159" + }, + "celery": { + "hashes": [ + "sha256:77ff3730198d6a17b3c1f05579ebe570b579efb35f6d7e13dba3b1368d068b35", + "sha256:81a67f0d53a688ec2bc8557bd5d6d7218f925a6f2e6df80e01560de9e28997ec" + ], + "index": "pypi", + "version": "==4.1.0" + }, + "certifi": { + "hashes": [ + "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", + "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" + ], + "version": "==2019.3.9" + }, + "chardet": { + "hashes": [ + "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae", + "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691" + ], + "version": "==3.0.4" }, "click": { "hashes": [ - "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", - "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" + "sha256:2335065e6395b9e67ca716de5f7526736bfa6ceead690adf616d925bdc622b13", + "sha256:5b94b49521f6456670fdb30cd82a4eca9412788a93fa6dd6df72c94d5a8ff2d7" ], - "version": "==6.7" + "version": "==7.0" }, "dataclasses": { "hashes": [ @@ -36,6 +124,21 @@ "index": "pypi", "version": "==0.6" }, + "decorator": { + "hashes": [ + "sha256:86156361c50488b84a3f148056ea716ca587df2f0de1d34750d35c21312725de", + "sha256:f069f3a01830ca754ba5258fde2278454a0b5b79e0d7f5c13b3b97e57d4acff6" + ], + "version": "==4.4.0" + }, + "docutils": { + "hashes": [ + "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", + "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", + "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" + ], + "version": "==0.14" + }, "flask": { "hashes": [ "sha256:2271c0070dbcb5275fad4a82e29f23ab92682dc45f9dfbc22c02ba9b9322ce48", @@ -44,18 +147,49 @@ "index": "pypi", "version": "==1.0.2" }, + "flask-sqlalchemy": { + "hashes": [ + "sha256:0c9609b0d72871c540a7945ea559c8fdf5455192d2db67219509aed680a3d45a", + "sha256:8631bbea987bc3eb0f72b1f691d47bd37ceb795e73b59ab48586d76d75a7c605" + ], + "index": "pypi", + "version": "==2.4.0" + }, + "hvac": { + "hashes": [ + "sha256:00f78fb4f8244605284338bb36df6f46fbd4e83807e94a72fbb63a7cbac850e6", + "sha256:727e4e4ecd69631112f8baadc5054024da82248836e77411c6e9e51dffe011c8" + ], + "version": "==0.8.2" + }, + "idna": { + "hashes": [ + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, "itsdangerous": { "hashes": [ - "sha256:cbb3fcf8d3e33df861709ecaf89d9e6629cff0a217bc2848f1b41cd30d360519" + "sha256:321b033d07f2a4136d3ec762eac9f16a10ccd60f53c0c91af90217ace7ba1f19", + "sha256:b12271b2047cb23eeb98c8b5622e2e5c5e9abd9784a153e9d8ef9cb4dd09d749" ], - "version": "==0.24" + "version": "==1.1.0" }, "jinja2": { "hashes": [ - "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", - "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" + "sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", + "sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b" + ], + "index": "pypi", + "version": "==2.10.1" + }, + "jmespath": { + "hashes": [ + "sha256:3720a4b1bd659dd2eecad0666459b9788813e032b83e7ba58578e48254e0a0e6", + "sha256:bde2aef6f44302dfb30320115b17d030798de8c4110e28d5cf6cf91a7a31074c" ], - "version": "==2.10" + "version": "==0.9.4" }, "jsonschema": { "hashes": [ @@ -65,30 +199,89 @@ "index": "pypi", "version": "==2.6.0" }, - "markupsafe": { + "kombu": { "hashes": [ - "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665" + "sha256:01f0da9fe222a2183345004243d1518c0fbe5875955f1b24842f2d9c65709ade", + "sha256:4249d9dd9dbf1fcec471d1c2def20653c9310dd1a217272d77e4844f9d5273cb" ], - "version": "==1.0" + "index": "pypi", + "version": "==4.1.0" + }, + "mako": { + "hashes": [ + "sha256:889c7f16d5388092d4c585cf9def19cad089e9f848a7c40e03394553048362a6" + ], + "version": "==1.0.11" + }, + "markupsafe": { + "hashes": [ + "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", + "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", + "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", + "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", + "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", + "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", + "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", + "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", + "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", + "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", + "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", + "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", + "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", + "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", + "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", + "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", + "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", + "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", + "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", + "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", + "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", + "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", + "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", + "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", + "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", + "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7" + ], + "version": "==1.1.1" }, "mimesis": { "hashes": [ - "sha256:787e843dd8212e07427144732dc662642e59ee1ba1c5be78af066492bb909966", - "sha256:e55a50654cfa1aa718d5983621b5d681409f67b38b542fffbb1282f05ceb2e05" + "sha256:2a17aa98cc8aff2c0b1828312e213a515030ed57a1c7b61fc07a87150cb0f25f", + "sha256:4b856023acdaaefe2e10bbfea9fd4cb6fa9adbbbe9618a8f796aa8887b58e6f2" ], "index": "pypi", - "version": "==2.0.1" + "version": "==2.1.0" + }, + "mypy-extensions": { + "hashes": [ + "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", + "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e" + ], + "index": "pypi", + "version": "==0.4.1" }, "mysqlclient": { "hashes": [ - "sha256:1e85e48b167e2af3bb08f273fdbd1ad6401cbe75057fa6513f97387dc7b282dc", - "sha256:2d9ec33de39f4d9c64ad7322ede0521d85829ce36a76f9dd3d6ab76a9c8648e5", - "sha256:371df79d000af56b4e540b7ce2120d1c9afb04b751bfce25a1eb609c50fd10ff", - "sha256:b3b1a7e4468180afb79289b54069d9499242946a4cedf3928cbf6b2a13800016", - "sha256:d56e379c03efad746e84705cbb97401f60d1f98b05e11a27f2d9c2d043936974" + "sha256:ff8ee1be84215e6c30a746b728c41eb0701a46ca76e343af445b35ce6250644f" ], "index": "pypi", - "version": "==1.3.12" + "version": "==1.3.13" + }, + "py": { + "hashes": [ + "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", + "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + ], + "version": "==1.8.0" + }, + "pycountry": { + "hashes": [ + "sha256:104a8ca94c700898c42a0172da2eab5a5675c49637b729a11db9e1dac2d983cd", + "sha256:8ec4020b2b15cd410893d573820d42ee12fe50365332e58c0975c953b60a16de" + ], + "version": "==18.12.8" }, "pyjwt": { "hashes": [ @@ -98,70 +291,200 @@ "index": "pypi", "version": "==1.6.4" }, + "python-dateutil": { + "hashes": [ + "sha256:7e6584c74aeed623791615e26efd690f29817a27c73085b78e4bad02493df2fb", + "sha256:c89805f6f4d64db21ed966fda138f8a5ed7a4fdbc1a8ee329ce1b74e3c74da9e" + ], + "index": "pypi", + "version": "==2.8.0" + }, + "python-editor": { + "hashes": [ + "sha256:1bf6e860a8ad52a14c3ee1252d5dc25b2030618ed80c022598f00176adc8367d", + "sha256:51fda6bcc5ddbbb7063b2af7509e43bd84bfc32a4ff71349ec7847713882327b", + "sha256:5f98b069316ea1c2ed3f67e7f5df6c0d8f10b689964a4a811ff64f0106819ec8" + ], + "version": "==1.0.4" + }, "pytz": { "hashes": [ - "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555", - "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749" + "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca", + "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6" ], "index": "pypi", - "version": "==2018.4" + "version": "==2018.7" }, "pyyaml": { "hashes": [ - "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8", - "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736", - "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f", - "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608", - "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8", - "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab", - "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7", - "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3", - "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1", - "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6", - "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8", - "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4", - "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca", - "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269" + "sha256:1adecc22f88d38052fb787d959f003811ca858b799590a5eaa70e63dca50308c", + "sha256:436bc774ecf7c103814098159fbb84c2715d25980175292c648f2da143909f95", + "sha256:460a5a4248763f6f37ea225d19d5c205677d8d525f6a83357ca622ed541830c2", + "sha256:5a22a9c84653debfbf198d02fe592c176ea548cccce47553f35f466e15cf2fd4", + "sha256:7a5d3f26b89d688db27822343dfa25c599627bc92093e788956372285c6298ad", + "sha256:9372b04a02080752d9e6f990179a4ab840227c6e2ce15b95e1278456664cf2ba", + "sha256:a5dcbebee834eaddf3fa7366316b880ff4062e4bcc9787b78c7fbb4a26ff2dd1", + "sha256:aee5bab92a176e7cd034e57f46e9df9a9862a71f8f37cad167c6fc74c65f5b4e", + "sha256:c51f642898c0bacd335fc119da60baae0824f2cde95b0330b56c0553439f0673", + "sha256:c68ea4d3ba1705da1e0d85da6684ac657912679a649e8868bd850d2c299cce13", + "sha256:e23d0cc5299223dcc37885dae624f382297717e459ea24053709675a976a3e19" + ], + "index": "pypi", + "version": "==5.1" + }, + "redis": { + "hashes": [ + "sha256:8a1900a9f2a0a44ecf6e8b5eb3e967a9909dfed219ad66df094f27f7d6f330fb", + "sha256:a22ca993cea2962dbb588f9f30d0015ac4afcc45bee27d3978c0dbe9e97c6c0f" + ], + "index": "pypi", + "version": "==2.10.6" + }, + "redis-py-cluster": { + "hashes": [ + "sha256:7db54b1de60bd34da3806676b112f07fc9afae556d8260ac02c3335d574ee42c" + ], + "version": "==1.3.6" + }, + "requests": { + "hashes": [ + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" + ], + "version": "==2.22.0" + }, + "retry": { + "hashes": [ + "sha256:ccddf89761fa2c726ab29391837d4327f819ea14d244c232a1d24c67a2f98606", + "sha256:f8bfa8b99b69c4506d6f5bd3b0aabf77f98cdb17f3c9fc3f5ca820033336fba4" ], "index": "pypi", - "version": "==3.12" + "version": "==0.9.2" + }, + "s3transfer": { + "hashes": [ + "sha256:7b9ad3213bff7d357f888e0fab5101b56fa1a0548ee77d121c3a3dbfbef4cb2e", + "sha256:f23d5cb7d862b104401d9021fc82e5fa0e0cf57b7660a1331425aab0c691d021" + ], + "version": "==0.2.0" + }, + "semver": { + "hashes": [ + "sha256:41c9aa26c67dc16c54be13074c352ab666bce1fa219c7110e8f03374cd4206b0", + "sha256:5b09010a66d9a3837211bb7ae5a20d10ba88f8cb49e92cb139a69ef90d5060d8" + ], + "index": "pypi", + "version": "==2.8.1" + }, + "six": { + "hashes": [ + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" + ], + "version": "==1.12.0" }, "sqlalchemy": { "hashes": [ - "sha256:2d5f08f714a886a1382c18be501e614bce50d362384dc089474019ce0768151c" + "sha256:c7fef198b43ef31dfd783d094fd5ee435ce8717592e6784c45ba337254998017" + ], + "index": "pypi", + "version": "==1.3.4" + }, + "typing-extensions": { + "hashes": [ + "sha256:07b2c978670896022a43c4b915df8958bec4a6b84add7f2c87b2b728bda3ba64", + "sha256:f3f0e67e1d42de47b5c67c32c9b26641642e9170fe7e292991793705cd5fef7c", + "sha256:fb2cd053238d33a8ec939190f30cfd736c00653a85a2919415cecf7dc3d9da71" + ], + "version": "==3.7.2" + }, + "unidecode": { + "hashes": [ + "sha256:092cdf7ad9d1052c50313426a625b717dab52f7ac58f859e09ea020953b1ad8f", + "sha256:8b85354be8fd0c0e10adbf0675f6dc2310e56fda43fa8fe049123b6c475e52fb" + ], + "index": "pypi", + "version": "==1.0.23" + }, + "urllib3": { + "hashes": [ + "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", + "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" ], "index": "pypi", - "version": "==1.2.8" + "version": "==1.25.3" }, "uwsgi": { "hashes": [ - "sha256:3dc2e9b48db92b67bfec1badec0d3fdcc0771316486c5efa3217569da3528bf2" + "sha256:d2318235c74665a60021a4fc7770e9c2756f9fc07de7b8c22805efe85b5ab277" ], "index": "pypi", - "version": "==2.0.17" + "version": "==2.0.17.1" + }, + "vine": { + "hashes": [ + "sha256:133ee6d7a9016f177ddeaf191c1f58421a1dcc6ee9a42c58b34bed40e1d2cd87", + "sha256:ea4947cc56d1fd6f2095c8d543ee25dad966f78692528e68b4fada11ba3f98af" + ], + "version": "==1.3.0" + }, + "webencodings": { + "hashes": [ + "sha256:a0af1213f3c2226497a97e2b3aa01a7e4bee4f403f95be16fc9acd2947514a78", + "sha256:b36a1c245f2d304965eb4e0a82848379241dc04b865afcc4aab16748587e1923" + ], + "version": "==0.5.1" }, "werkzeug": { "hashes": [ - "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c", - "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b" + "sha256:865856ebb55c4dcd0630cdd8f3331a1847a819dda7e8c750d3db6f2aa6c0209c", + "sha256:a0b915f0815982fb2a09161cb8f31708052d0951c3ba433ccc5e1aa276507ca6" ], - "version": "==0.14.1" + "version": "==0.15.4" } }, "develop": { + "alabaster": { + "hashes": [ + "sha256:446438bdcca0e05bd45ea2de1668c1d9b032e1a9154c2c259092d77031ddd359", + "sha256:a661d72d58e6ea8a57f7a86e37d86716863ee5e92788398526d58b26a4e4dc02" + ], + "version": "==0.7.12" + }, "astroid": { "hashes": [ - "sha256:032f6e09161e96f417ea7fad46d3fac7a9019c775f202182c22df0e4f714cb1c", - "sha256:dea42ae6e0b789b543f728ddae7ddb6740ba33a49fb52c4a4d9cb7bb4aa6ec09" + "sha256:87de48a92e29cedf7210ffa853d11441e7ad94cb47bacd91b023499b51cbc756", + "sha256:d25869fc7f44f1d9fb7d24fd7ea0639656f5355fc3089cd1f3d18c6ec6b124c7" ], - "version": "==1.6.4" + "version": "==1.6.6" + }, + "atomicwrites": { + "hashes": [ + "sha256:03472c30eb2c5d1ba9227e4c2ca66ab8287fbfbbda3888aa93dc2e28fc6811b4", + "sha256:75a9445bac02d8d058d5e1fe689654ba5a6556a1dfd8ce6ec55a0ed79866cfa6" + ], + "version": "==1.3.0" + }, + "attrs": { + "hashes": [ + "sha256:69c0dbf2ed392de1cb5ec704444b08a5ef81680a61cb899dc08127123af36a79", + "sha256:f0b870f674851ecbfbbbd364d6b5cbdff9dcedbc7f3f5e18a6891057f21fe399" + ], + "version": "==19.1.0" + }, + "babel": { + "hashes": [ + "sha256:af92e6106cb7c55286b25b38ad7695f8b4efb36a90ba483d7f7a6628c46158ab", + "sha256:e86135ae101e31e2c8ec20a4e0c5220f4eed12487d5cf3f78be7e98d3a57fc28" + ], + "version": "==2.7.0" }, "certifi": { "hashes": [ - "sha256:13e698f54293db9f89122b0581843a782ad0934a4fe0172d2a980ba77fc61bb7", - "sha256:9fa520c1bacfb634fa7af20a76bcbd3d5fb390481724c597da32c719a7dca4b0" + "sha256:59b7658e26ca9c7339e00f8f4636cdfe59d34fa37b9b04f6f9e9926b3cece1a5", + "sha256:b26104d6835d1f5e49452a26eb2ff87fe7090b89dfcaee5ea2212697e1e1d7ae" ], - "version": "==2018.4.16" + "version": "==2019.3.9" }, "chardet": { "hashes": [ @@ -172,53 +495,45 @@ }, "coverage": { "hashes": [ - "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba", - "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed", - "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a", - "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd", - "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640", - "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2", - "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162", - "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508", - "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249", - "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694", - "sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a", - "sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287", - "sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1", - "sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000", - "sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1", - "sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e", - "sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5", - "sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062", - "sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba", - "sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc", - "sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc", - "sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99", - "sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653", - "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c", - "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558", - "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f", - "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4", - "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91", - "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d", - "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9", - "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd", - "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d", - "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", - "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77", - "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80", - "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e" - ], - "index": "pypi", - "version": "==4.5.1" + "sha256:0f2315c793b1360f80a9119fff76efb7b4e5ab5062651dff515e681719f29689", + "sha256:21e47d2ff9c75e25880dd12b316db11379e9afc98b39e9516149d189c15c564b", + "sha256:2890cb40464686c0c1dccc1223664bbc34d85af053bc5dbcd71ea13959e264f2", + "sha256:464d85d6959497cc4adfa9f0d36fca809e2ca7ec5f4625f548317892cac6ed7c", + "sha256:464e0eda175c7fe2dc730d9d02acde5b8a8518d9417413dee6ca187d1f65ef89", + "sha256:47ad00a0c025f87a7528cc13d013c54e4691ae8730430e49ec9c7ace7e0e1fba", + "sha256:53fa7aa7643a22eeadcf8b781b97a51f37d43ba1d897a05238aa7e4d11bc0667", + "sha256:54d73fe68a7ac9c847af69a234a7461bbaf3cad95f258317d4584d14dd53f679", + "sha256:61e0bcf15aa0385e15d1fe4a86022a6b813d08c785855e3fab56ba6d7ac3dd21", + "sha256:67288f8834a0a64c1af66286b22fd325b5524ceaa153a51c3e2e30f7e8b3f826", + "sha256:7413f078fbba267de44814584593a729f88fc37f2d938263844b7f4daf1e36ec", + "sha256:85c028e959312285225040cdac5ad3db6189e958a234f09ae6b4ba5f539c842d", + "sha256:8ddcf308f894d01a1a0ae01283d19b613751815b7190113266a0b7f9d076e86d", + "sha256:95ce1a70323d47c0f6b8d6cfd3c14c38cb30d51fd1ab4f6414734fa33a78b17e", + "sha256:95f9f5072afeb2204401401cbd0ab978a9f86ef1ebc5cd267ba431cfa581cc4d", + "sha256:981a64063242a2c6c88dda33ccafe3583026847961fe56636b6a00c47674e258", + "sha256:a0d98c71d026c1757c7393a99d24c6e42091ff41e20e68238b17e145252c2d0a", + "sha256:adab01e4c63a01bdf036f57f0114497994aa2195d8659d12a3d69673c3f27939", + "sha256:b718efb33097c7651a60a03b4b38b14776f92194bc0e9e598ce05ddaef7c70e7", + "sha256:b7a06a523dfeaf417da630d46ad4f4e11ca1bae6202c9312c4cb987dde5792fc", + "sha256:c68164c4f49cfc2e66ca1ded62e4a1092a6bd4b2c65222059b867700ad19151c", + "sha256:c86a12b3dc004bcbe97a3849354bd1f93eb6fb69b0e4eb58831fd7adba7740ec", + "sha256:ca8827a5dad1176a8da6bf5396fd07e66549d1bc842047b76cdf69e196597a80", + "sha256:cfb6b7035c6605e2a87abe7d84ea35a107e6c432014a3f1ca243ab57a558fbcd", + "sha256:da6585339fc8a25086003a2b2c0167438b8ab0cd0ccae468d22ed603e414bba1", + "sha256:e837865a7b20c01a8a2f904c05fba36e8406b146649ff9174cbddf32e217b777", + "sha256:e958ab5b6a7f3b88289a25c95d031f2b62bc73219141c09d261fd97f244c124c", + "sha256:f6b822c68f68f48d480d23fcfcd1d4df7d42ff03cf5d7b574d09e662c0b95b43" + ], + "index": "pypi", + "version": "==4.5" }, "coveralls": { "hashes": [ - "sha256:32569a43c9dbc13fa8199247580a4ab182ef439f51f65bb7f8316d377a1340e8", - "sha256:664794748d2e5673e347ec476159a9d87f43e0d2d44950e98ed0e27b98da8346" + "sha256:a8de28a5f04e418c7142b8ce6588c3a64245b433c458a5871cb043383667e4f2", + "sha256:c5e50b73b980d89308816b597e3e7bdeb0adedf831585d5c4ac967d576f8925d" ], "index": "pypi", - "version": "==1.3.0" + "version": "==1.8.0" }, "docopt": { "hashes": [ @@ -226,20 +541,49 @@ ], "version": "==0.6.2" }, + "docutils": { + "hashes": [ + "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", + "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274", + "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6" + ], + "version": "==0.14" + }, "idna": { "hashes": [ - "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f", - "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4" + "sha256:c357b3f628cf53ae2c4c05627ecc484553142ca23264e593d327bcde5e9c3407", + "sha256:ea8b7f6188e6fa117537c3df7da9fc686d485087abf6ac197f9c46432f7e4a3c" + ], + "version": "==2.8" + }, + "imagesize": { + "hashes": [ + "sha256:3f349de3eb99145973fefb7dbe38554414e5c30abd0c8e4b970a7c9d09f3a1d8", + "sha256:f3832918bc3c66617f92e35f5d70729187676313caa60c187eb0f28b8fe5e3b5" + ], + "version": "==1.1.0" + }, + "importlib-metadata": { + "hashes": [ + "sha256:a9f185022cfa69e9ca5f7eabfd5a58b689894cb78a11e3c8c89398a8ccbb8e7f", + "sha256:df1403cd3aebeb2b1dcd3515ca062eecb5bd3ea7611f18cba81130c68707e879" ], - "version": "==2.6" + "version": "==0.17" }, "isort": { "hashes": [ - "sha256:1153601da39a25b14ddc54955dbbacbb6b2d19135386699e2ad58517953b34af", - "sha256:b9c40e9750f3d77e6e4d441d8b0266cf555e7cdabdcff33c4fd06366ca761ef8", - "sha256:ec9ef8f4a9bc6f71eec99e1806bfa2de401650d996c59330782b89a5555c1497" + "sha256:c40744b6bc5162bbb39c1257fe298b7a393861d50978b565f3ccd9cb9de0182a", + "sha256:f57abacd059dc3bd666258d1efb0377510a89777fda3e3274e3c01f7c03ae22d" + ], + "version": "==4.3.20" + }, + "jinja2": { + "hashes": [ + "sha256:065c4f02ebe7f7cf559e49ee5a95fb800a9e4528727aec6f24402a5374c65013", + "sha256:14dd6caf1527abb21f08f86c784eac40853ba93edb79552aa1e4b8aef1b61c7b" ], - "version": "==4.3.4" + "index": "pypi", + "version": "==2.10.1" }, "jsonschema": { "hashes": [ @@ -251,37 +595,59 @@ }, "lazy-object-proxy": { "hashes": [ - "sha256:0ce34342b419bd8f018e6666bfef729aec3edf62345a53b537a4dcc115746a33", - "sha256:1b668120716eb7ee21d8a38815e5eb3bb8211117d9a90b0f8e21722c0758cc39", - "sha256:209615b0fe4624d79e50220ce3310ca1a9445fd8e6d3572a896e7f9146bbf019", - "sha256:27bf62cb2b1a2068d443ff7097ee33393f8483b570b475db8ebf7e1cba64f088", - "sha256:27ea6fd1c02dcc78172a82fc37fcc0992a94e4cecf53cb6d73f11749825bd98b", - "sha256:2c1b21b44ac9beb0fc848d3993924147ba45c4ebc24be19825e57aabbe74a99e", - "sha256:2df72ab12046a3496a92476020a1a0abf78b2a7db9ff4dc2036b8dd980203ae6", - "sha256:320ffd3de9699d3892048baee45ebfbbf9388a7d65d832d7e580243ade426d2b", - "sha256:50e3b9a464d5d08cc5227413db0d1c4707b6172e4d4d915c1c70e4de0bbff1f5", - "sha256:5276db7ff62bb7b52f77f1f51ed58850e315154249aceb42e7f4c611f0f847ff", - "sha256:61a6cf00dcb1a7f0c773ed4acc509cb636af2d6337a08f362413c76b2b47a8dd", - "sha256:6ae6c4cb59f199d8827c5a07546b2ab7e85d262acaccaacd49b62f53f7c456f7", - "sha256:7661d401d60d8bf15bb5da39e4dd72f5d764c5aff5a86ef52a042506e3e970ff", - "sha256:7bd527f36a605c914efca5d3d014170b2cb184723e423d26b1fb2fd9108e264d", - "sha256:7cb54db3535c8686ea12e9535eb087d32421184eacc6939ef15ef50f83a5e7e2", - "sha256:7f3a2d740291f7f2c111d86a1c4851b70fb000a6c8883a59660d95ad57b9df35", - "sha256:81304b7d8e9c824d058087dcb89144842c8e0dea6d281c031f59f0acf66963d4", - "sha256:933947e8b4fbe617a51528b09851685138b49d511af0b6c0da2539115d6d4514", - "sha256:94223d7f060301b3a8c09c9b3bc3294b56b2188e7d8179c762a1cda72c979252", - "sha256:ab3ca49afcb47058393b0122428358d2fbe0408cf99f1b58b295cfeb4ed39109", - "sha256:bd6292f565ca46dee4e737ebcc20742e3b5be2b01556dafe169f6c65d088875f", - "sha256:cb924aa3e4a3fb644d0c463cad5bc2572649a6a3f68a7f8e4fbe44aaa6d77e4c", - "sha256:d0fc7a286feac9077ec52a927fc9fe8fe2fabab95426722be4c953c9a8bede92", - "sha256:ddc34786490a6e4ec0a855d401034cbd1242ef186c20d79d2166d6a4bd449577", - "sha256:e34b155e36fa9da7e1b7c738ed7767fc9491a62ec6af70fe9da4a057759edc2d", - "sha256:e5b9e8f6bda48460b7b143c3821b21b452cb3a835e6bbd5dd33aa0c8d3f5137d", - "sha256:e81ebf6c5ee9684be8f2c87563880f93eedd56dd2b6146d8a725b50b7e5adb0f", - "sha256:eb91be369f945f10d3a49f5f9be8b3d0b93a4c2be8f8a5b83b0571b8123e0a7a", - "sha256:f460d1ceb0e4a5dcb2a652db0904224f367c9b3c1470d5a7683c0480e582468b" - ], - "version": "==1.3.1" + "sha256:159a745e61422217881c4de71f9eafd9d703b93af95618635849fe469a283661", + "sha256:23f63c0821cc96a23332e45dfaa83266feff8adc72b9bcaef86c202af765244f", + "sha256:3b11be575475db2e8a6e11215f5aa95b9ec14de658628776e10d96fa0b4dac13", + "sha256:3f447aff8bc61ca8b42b73304f6a44fa0d915487de144652816f950a3f1ab821", + "sha256:4ba73f6089cd9b9478bc0a4fa807b47dbdb8fad1d8f31a0f0a5dbf26a4527a71", + "sha256:4f53eadd9932055eac465bd3ca1bd610e4d7141e1278012bd1f28646aebc1d0e", + "sha256:64483bd7154580158ea90de5b8e5e6fc29a16a9b4db24f10193f0c1ae3f9d1ea", + "sha256:6f72d42b0d04bfee2397aa1862262654b56922c20a9bb66bb76b6f0e5e4f9229", + "sha256:7c7f1ec07b227bdc561299fa2328e85000f90179a2f44ea30579d38e037cb3d4", + "sha256:7c8b1ba1e15c10b13cad4171cfa77f5bb5ec2580abc5a353907780805ebe158e", + "sha256:8559b94b823f85342e10d3d9ca4ba5478168e1ac5658a8a2f18c991ba9c52c20", + "sha256:a262c7dfb046f00e12a2bdd1bafaed2408114a89ac414b0af8755c696eb3fc16", + "sha256:acce4e3267610c4fdb6632b3886fe3f2f7dd641158a843cf6b6a68e4ce81477b", + "sha256:be089bb6b83fac7f29d357b2dc4cf2b8eb8d98fe9d9ff89f9ea6012970a853c7", + "sha256:bfab710d859c779f273cc48fb86af38d6e9210f38287df0069a63e40b45a2f5c", + "sha256:c10d29019927301d524a22ced72706380de7cfc50f767217485a912b4c8bd82a", + "sha256:dd6e2b598849b3d7aee2295ac765a578879830fb8966f70be8cd472e6069932e", + "sha256:e408f1eacc0a68fed0c08da45f31d0ebb38079f043328dce69ff133b95c29dc1" + ], + "version": "==1.4.1" + }, + "markupsafe": { + "hashes": [ + "sha256:00bc623926325b26bb9605ae9eae8a215691f33cae5df11ca5424f06f2d1f473", + "sha256:09027a7803a62ca78792ad89403b1b7a73a01c8cb65909cd876f7fcebd79b161", + "sha256:09c4b7f37d6c648cb13f9230d847adf22f8171b1ccc4d5682398e77f40309235", + "sha256:1027c282dad077d0bae18be6794e6b6b8c91d58ed8a8d89a89d59693b9131db5", + "sha256:24982cc2533820871eba85ba648cd53d8623687ff11cbb805be4ff7b4c971aff", + "sha256:29872e92839765e546828bb7754a68c418d927cd064fd4708fab9fe9c8bb116b", + "sha256:43a55c2930bbc139570ac2452adf3d70cdbb3cfe5912c71cdce1c2c6bbd9c5d1", + "sha256:46c99d2de99945ec5cb54f23c8cd5689f6d7177305ebff350a58ce5f8de1669e", + "sha256:500d4957e52ddc3351cabf489e79c91c17f6e0899158447047588650b5e69183", + "sha256:535f6fc4d397c1563d08b88e485c3496cf5784e927af890fb3c3aac7f933ec66", + "sha256:62fe6c95e3ec8a7fad637b7f3d372c15ec1caa01ab47926cfdf7a75b40e0eac1", + "sha256:6dd73240d2af64df90aa7c4e7481e23825ea70af4b4922f8ede5b9e35f78a3b1", + "sha256:717ba8fe3ae9cc0006d7c451f0bb265ee07739daf76355d06366154ee68d221e", + "sha256:79855e1c5b8da654cf486b830bd42c06e8780cea587384cf6545b7d9ac013a0b", + "sha256:7c1699dfe0cf8ff607dbdcc1e9b9af1755371f92a68f706051cc8c37d447c905", + "sha256:88e5fcfb52ee7b911e8bb6d6aa2fd21fbecc674eadd44118a9cc3863f938e735", + "sha256:8defac2f2ccd6805ebf65f5eeb132adcf2ab57aa11fdf4c0dd5169a004710e7d", + "sha256:98c7086708b163d425c67c7a91bad6e466bb99d797aa64f965e9d25c12111a5e", + "sha256:9add70b36c5666a2ed02b43b335fe19002ee5235efd4b8a89bfcf9005bebac0d", + "sha256:9bf40443012702a1d2070043cb6291650a0841ece432556f784f004937f0f32c", + "sha256:ade5e387d2ad0d7ebf59146cc00c8044acbd863725f887353a10df825fc8ae21", + "sha256:b00c1de48212e4cc9603895652c5c410df699856a2853135b3967591e4beebc2", + "sha256:b1282f8c00509d99fef04d8ba936b156d419be841854fe901d8ae224c59f0be5", + "sha256:b2051432115498d3562c084a49bba65d97cf251f5a331c64a12ee7e04dacc51b", + "sha256:ba59edeaa2fc6114428f1637ffff42da1e311e29382d81b339c1817d37ec93c6", + "sha256:c8716a48d94b06bb3b2524c2b77e055fb313aeb4ea620c8dd03a105574ba704f", + "sha256:cd5df75523866410809ca100dc9681e301e3c27567cf498077e8551b6d20e42f", + "sha256:e249096428b3ae81b08327a63a485ad0878de3fb939049038579ac0ef61e17e7" + ], + "version": "==1.1.1" }, "mccabe": { "hashes": [ @@ -292,92 +658,165 @@ }, "mimesis": { "hashes": [ - "sha256:787e843dd8212e07427144732dc662642e59ee1ba1c5be78af066492bb909966", - "sha256:e55a50654cfa1aa718d5983621b5d681409f67b38b542fffbb1282f05ceb2e05" + "sha256:2a17aa98cc8aff2c0b1828312e213a515030ed57a1c7b61fc07a87150cb0f25f", + "sha256:4b856023acdaaefe2e10bbfea9fd4cb6fa9adbbbe9618a8f796aa8887b58e6f2" ], "index": "pypi", - "version": "==2.0.1" + "version": "==2.1.0" + }, + "more-itertools": { + "hashes": [ + "sha256:2112d2ca570bb7c3e53ea1a35cd5df42bb0fd10c45f0fb97178679c3c03d64c7", + "sha256:c3e4748ba1aad8dba30a4886b0b1a2004f9a863837b8654e7059eebf727afa5a" + ], + "markers": "python_version > '2.7'", + "version": "==7.0.0" }, "mypy": { "hashes": [ - "sha256:01cf289838f266ae7c6550c813181ee77d21eac9459dbf067e7a95a0a2db9721", - "sha256:bc251cb31bc236d9fe4bcc442c994c45fff2541f7161ee52dc949741fe9ca3dd" + "sha256:986a7f97808a865405c5fd98fae5ebfa963c31520a56c783df159e9a81e41b3e", + "sha256:cc5df73cc11d35655a8c364f45d07b13c8db82c000def4bd7721be13356533b4" ], "index": "pypi", - "version": "==0.600" + "version": "==0.660" + }, + "mypy-extensions": { + "hashes": [ + "sha256:37e0e956f41369209a3d5f34580150bcacfabaa57b33a15c0b25f4b5725e0812", + "sha256:b16cabe759f55e3409a7d231ebd2841378fb0c27a5d1994719e340e4f429ac3e" + ], + "index": "pypi", + "version": "==0.4.1" }, "nose2": { "hashes": [ - "sha256:954a62cfb2d2ac06dad32995cbc822bf00cc11e20d543963515932fd4eff33fa" + "sha256:0ede156fd7974fa40893edeca0b709f402c0ccacd7b81b22e76f73c116d1b999", + "sha256:31d8beb00aed3ccc6efb1742bb90227d883e471715188249f594310676e0ef0e" ], "index": "pypi", - "version": "==0.7.4" + "version": "==0.9.1" }, "openapi-spec-validator": { "hashes": [ - "sha256:76fcd37c20d14f1a5084f41fb2e4d0d900f6474f5fda40c27ddbeb791eaad16e", - "sha256:f97ba93798cc958e04250ffe6418c7a25a13363c6cb0f0269d9e7db1a0e467c1" + "sha256:5d0f22167810c32e771fa7e4aab6ef26d09233b70817f4d84f9c13bd9a522a37", + "sha256:77c4fb47fe8a7dd527c7433861638221eb416827dc1c5c983505c0a38ca6e9eb", + "sha256:873aad19e68c8eeceb9922840f39e671e8ce62b2587f18b4f66f306d9eed8bd9" ], "index": "pypi", - "version": "==0.2.0" + "version": "==0.2.7" + }, + "packaging": { + "hashes": [ + "sha256:0c98a5d0be38ed775798ece1b9727178c4469d9c3b4ada66e8e6b7849f8732af", + "sha256:9e1cbf8c12b1f1ce0bb5344b8d7ecf66a6f8a6e91bcb0c84593ed6d3ab5c4ab3" + ], + "version": "==19.0" }, - "pyaml": { + "pluggy": { "hashes": [ - "sha256:66623c52f34d83a2c0fc963e08e8b9d0c13d88404e3b43b1852ef71eda19afa3", - "sha256:f83fc302c52c6b83a15345792693ae0b5bc07ad19f59e318b7617d7123d62990" + "sha256:0825a152ac059776623854c1543d65a4ad408eb3d33ee114dff91e57ec6ae6fc", + "sha256:b9817417e95936bf75d85d3f8767f7df6cdde751fc40aed3bb3074cbcb77757c" ], - "version": "==17.12.1" + "version": "==0.12.0" + }, + "py": { + "hashes": [ + "sha256:64f65755aee5b381cea27766a3a147c3f15b9b6b9ac88676de66ba2ae36793fa", + "sha256:dc639b046a6e2cff5bbe40194ad65936d6ba360b52b3c3fe1d08a82dd50b5e53" + ], + "version": "==1.8.0" }, "pydocstyle": { "hashes": [ - "sha256:08a870edc94508264ed90510db466c6357c7192e0e866561d740624a8fc7d90c", - "sha256:4d5bcde961107873bae621f3d580c3e35a426d3687ffc6f8fb356f6628da5a97", - "sha256:af9fcccb303899b83bec82dc9a1d56c60fc369973223a5e80c3dfa9bdf984405" + "sha256:2258f9b0df68b97bf3a6c29003edc5238ff8879f1efb6f1999988d934e432bd8", + "sha256:5741c85e408f9e0ddf873611085e819b809fca90b619f5fd7f34bd4959da3dd4", + "sha256:ed79d4ec5e92655eccc21eb0c6cf512e69512b4a97d215ace46d17e4990f2039" ], "index": "pypi", - "version": "==2.1.1" + "version": "==3.0.0" + }, + "pygments": { + "hashes": [ + "sha256:71e430bc85c88a430f000ac1d9b331d2407f681d6f6aec95e8bcfbc3df5b0127", + "sha256:881c4c157e45f30af185c1ffe8d549d48ac9127433f2c380c24b84572ad66297" + ], + "version": "==2.4.2" }, "pylint": { "hashes": [ - "sha256:aa519865f8890a5905fa34924fed0f3bfc7d84fc9f9142c16dac52ffecd25a39", - "sha256:c353d8225195b37cc3aef18248b8f3fe94c5a6a95affaf885ae21a24ca31d8eb" + "sha256:02c2b6d268695a8b64ad61847f92e611e6afcff33fd26c3a2125370c4662905d", + "sha256:ee1e85575587c5b58ddafa25e1c1b01691ef172e139fc25585e5d3f02451da93" + ], + "index": "pypi", + "version": "==1.9.4" + }, + "pyparsing": { + "hashes": [ + "sha256:1873c03321fc118f4e9746baf201ff990ceb915f433f23b395f5580d1840cb2a", + "sha256:9b6323ef4ab914af344ba97510e966d64ba91055d6b9afa6b30799340e89cc03" + ], + "version": "==2.4.0" + }, + "pyrsistent": { + "hashes": [ + "sha256:16692ee739d42cf5e39cef8d27649a8c1fdb7aa99887098f1460057c5eb75c3a" + ], + "version": "==0.15.2" + }, + "pytest": { + "hashes": [ + "sha256:52fa94b4ac81d2f063ee05e303acedf5c605e15dc0f4eef468b5c137f77241c3", + "sha256:5467f37a0d6bb0b4e684b71af268e005996b9eaaefe54e3d64d86afd90da8d78" ], "index": "pypi", - "version": "==1.9.1" + "version": "==4.6.0" + }, + "pytest-cov": { + "hashes": [ + "sha256:2b097cde81a302e1047331b48cadacf23577e431b61e9c6f49a1170bbe3d3da6", + "sha256:e00ea4fdde970725482f1f35630d12f074e121a23801aabf2ae154ec6bdd343a" + ], + "index": "pypi", + "version": "==2.7.1" + }, + "pytz": { + "hashes": [ + "sha256:31cb35c89bd7d333cd32c5f278fca91b523b0834369e757f4c5641ea252236ca", + "sha256:8e0f8568c118d3077b46be7d654cc8167fa916092e28320cde048e54bfc9f1e6" + ], + "index": "pypi", + "version": "==2018.7" }, "pyyaml": { "hashes": [ - "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8", - "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736", - "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f", - "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608", - "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8", - "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab", - "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7", - "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3", - "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1", - "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6", - "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8", - "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4", - "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca", - "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269" + "sha256:1adecc22f88d38052fb787d959f003811ca858b799590a5eaa70e63dca50308c", + "sha256:436bc774ecf7c103814098159fbb84c2715d25980175292c648f2da143909f95", + "sha256:460a5a4248763f6f37ea225d19d5c205677d8d525f6a83357ca622ed541830c2", + "sha256:5a22a9c84653debfbf198d02fe592c176ea548cccce47553f35f466e15cf2fd4", + "sha256:7a5d3f26b89d688db27822343dfa25c599627bc92093e788956372285c6298ad", + "sha256:9372b04a02080752d9e6f990179a4ab840227c6e2ce15b95e1278456664cf2ba", + "sha256:a5dcbebee834eaddf3fa7366316b880ff4062e4bcc9787b78c7fbb4a26ff2dd1", + "sha256:aee5bab92a176e7cd034e57f46e9df9a9862a71f8f37cad167c6fc74c65f5b4e", + "sha256:c51f642898c0bacd335fc119da60baae0824f2cde95b0330b56c0553439f0673", + "sha256:c68ea4d3ba1705da1e0d85da6684ac657912679a649e8868bd850d2c299cce13", + "sha256:e23d0cc5299223dcc37885dae624f382297717e459ea24053709675a976a3e19" ], "index": "pypi", - "version": "==3.12" + "version": "==5.1" }, "requests": { "hashes": [ - "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", - "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" + "sha256:11e007a8a2aa0323f5a921e9e6a2d7e4e67d9877e85773fba9ba6419025cbeb4", + "sha256:9cf5292fcd0f598c671cfc1e0d7d1a7f13bb8085e9a590f48c010551dc6c4b31" ], - "version": "==2.18.4" + "version": "==2.22.0" }, "six": { "hashes": [ - "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9", - "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb" + "sha256:3350809f0555b11f552448330d0b52d5f24c91a322ea4a15ef22629740f3761c", + "sha256:d16a0141ec1a18405cd4ce8b4613101da75da0e9a7aec5bdd4fa804d0e0eba73" ], - "version": "==1.11.0" + "version": "==1.12.0" }, "snowballstemmer": { "hashes": [ @@ -386,41 +825,117 @@ ], "version": "==1.2.1" }, - "typed-ast": { + "sphinx": { "hashes": [ - "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", - "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", - "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", - "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", - "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", - "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", - "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa", - "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", - "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", - "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", - "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", - "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", - "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", - "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", - "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", - "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", - "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", - "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6" + "sha256:423280646fb37944dd3c85c58fb92a20d745793a9f6c511f59da82fa97cd404b", + "sha256:de930f42600a4fef993587633984cc5027dedba2464bcf00ddace26b40f8d9ce" ], - "version": "==1.1.0" + "index": "pypi", + "version": "==2.0.1" + }, + "sphinx-autodoc-typehints": { + "hashes": [ + "sha256:19fe0b426b7c008181f67f816060da7f046bd8a42723f67a685d26d875bcefd7", + "sha256:f9c06acfec80766fe8f542a6d6a042e751fcf6ce2e2711a7dc00d8b6daf8aa36" + ], + "index": "pypi", + "version": "==1.6.0" + }, + "sphinxcontrib-applehelp": { + "hashes": [ + "sha256:edaa0ab2b2bc74403149cb0209d6775c96de797dfd5b5e2a71981309efab3897", + "sha256:fb8dee85af95e5c30c91f10e7eb3c8967308518e0f7488a2828ef7bc191d0d5d" + ], + "version": "==1.0.1" + }, + "sphinxcontrib-devhelp": { + "hashes": [ + "sha256:6c64b077937330a9128a4da74586e8c2130262f014689b4b89e2d08ee7294a34", + "sha256:9512ecb00a2b0821a146736b39f7aeb90759834b07e81e8cc23a9c70bacb9981" + ], + "version": "==1.0.1" + }, + "sphinxcontrib-htmlhelp": { + "hashes": [ + "sha256:4670f99f8951bd78cd4ad2ab962f798f5618b17675c35c5ac3b2132a14ea8422", + "sha256:d4fd39a65a625c9df86d7fa8a2d9f3cd8299a3a4b15db63b50aac9e161d8eff7" + ], + "version": "==1.0.2" + }, + "sphinxcontrib-jsmath": { + "hashes": [ + "sha256:2ec2eaebfb78f3f2078e73666b1415417a116cc848b72e5172e596c871103178", + "sha256:a9925e4a4587247ed2191a22df5f6970656cb8ca2bd6284309578f2153e0c4b8" + ], + "version": "==1.0.1" + }, + "sphinxcontrib-qthelp": { + "hashes": [ + "sha256:513049b93031beb1f57d4daea74068a4feb77aa5630f856fcff2e50de14e9a20", + "sha256:79465ce11ae5694ff165becda529a600c754f4bc459778778c7017374d4d406f" + ], + "version": "==1.0.2" + }, + "sphinxcontrib-serializinghtml": { + "hashes": [ + "sha256:c0efb33f8052c04fd7a26c0a07f1678e8512e0faec19f4aa8f2473a8b81d5227", + "sha256:db6615af393650bf1151a6cd39120c29abaf93cc60db8c48eb2dddbfdc3a9768" + ], + "version": "==1.1.3" + }, + "typed-ast": { + "hashes": [ + "sha256:023625bfa9359e29bd6e24cac2a4503495b49761d48a5f1e38333fc4ac4d93fe", + "sha256:07591f7a5fdff50e2e566c4c1e9df545c75d21e27d98d18cb405727ed0ef329c", + "sha256:153e526b0f4ffbfada72d0bb5ffe8574ba02803d2f3a9c605c8cf99dfedd72a2", + "sha256:3ad2bdcd46a4a1518d7376e9f5016d17718a9ed3c6a3f09203d832f6c165de4a", + "sha256:3ea98c84df53ada97ee1c5159bb3bc784bd734231235a1ede14c8ae0775049f7", + "sha256:51a7141ccd076fa561af107cfb7a8b6d06a008d92451a1ac7e73149d18e9a827", + "sha256:52c93cd10e6c24e7ac97e8615da9f224fd75c61770515cb323316c30830ddb33", + "sha256:6344c84baeda3d7b33e157f0b292e4dd53d05ddb57a63f738178c01cac4635c9", + "sha256:64699ca1b3bd5070bdeb043e6d43bc1d0cebe08008548f4a6bee782b0ecce032", + "sha256:74903f2e56bbffe29282ef8a5487d207d10be0f8513b41aff787d954a4cf91c9", + "sha256:7891710dba83c29ee2bd51ecaa82f60f6bede40271af781110c08be134207bf2", + "sha256:91976c56224e26c256a0de0f76d2004ab885a29423737684b4f7ebdd2f46dde2", + "sha256:9bad678a576ecc71f25eba9f1e3fd8d01c28c12a2834850b458428b3e855f062", + "sha256:b4726339a4c180a8b6ad9d8b50d2b6dc247e1b79b38fe2290549c98e82e4fd15", + "sha256:ba36f6aa3f8933edf94ea35826daf92cbb3ec248b89eccdc053d4a815d285357", + "sha256:bbc96bde544fd19e9ef168e4dfa5c3dfe704bfa78128fa76f361d64d6b0f731a", + "sha256:c0c927f1e44469056f7f2dada266c79b577da378bbde3f6d2ada726d131e4824", + "sha256:c0f9a3708008aa59f560fa1bd22385e05b79b8e38e0721a15a8402b089243442", + "sha256:f0bf6f36ff9c5643004171f11d2fdc745aa3953c5aacf2536a0685db9ceb3fb1", + "sha256:f5be39a0146be663cbf210a4d95c3c58b2d7df7b043c9047c5448e358f0550a2", + "sha256:fcd198bf19d9213e5cbf2cde2b9ef20a9856e716f76f9476157f90ae6de06cc6" + ], + "version": "==1.2.0" }, "urllib3": { "hashes": [ - "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", - "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" + "sha256:b246607a25ac80bedac05c6f282e3cdaf3afb65420fd024ac94435cabe6e18d1", + "sha256:dbe59173209418ae49d485b87d1681aefa36252ee85884c31346debd19463232" ], - "version": "==1.22" + "index": "pypi", + "version": "==1.25.3" + }, + "wcwidth": { + "hashes": [ + "sha256:3df37372226d6e63e1b1e1eda15c594bca98a22d33a23832a90998faa96bc65e", + "sha256:f4ebe71925af7b40a864553f761ed559b43544f8f71746c2d756c7fe788ade7c" + ], + "version": "==0.1.7" }, "wrapt": { "hashes": [ - "sha256:d4d560d479f2c21e1b5443bbd15fe7ec4b37fe7e53d335d3b9b0a7b1226fe3c6" + "sha256:4aea003270831cceb8a90ff27c4031da6ead7ec1886023b80ce0dfe0adf61533" + ], + "version": "==1.11.1" + }, + "zipp": { + "hashes": [ + "sha256:8c1019c6aad13642199fbe458275ad6a84907634cc9f0989877ccc4a2840139d", + "sha256:ca943a7e809cc12257001ccfb99e3563da9af99d52f261725e96dfe0f9275bc3" ], - "version": "==1.10.11" + "version": "==0.5.1" } } } diff --git a/README.md b/README.md index f7ca11c..0d50630 100644 --- a/README.md +++ b/README.md @@ -1,37 +1,74 @@ # arXiv Submission -This repository houses exploratory development related to the arXiv-NG +This repository houses development related to the arXiv-NG submission system. See https://cul-it.github.io/arxiv-submission-core/ for the latest documentation. +## Documentation + +### Freshen/build + +Update the API doc source refs with: + +```bash +sphinx-apidoc -o docs/source/arxiv.submission -e -f -M --implicit-namespaces core/arxiv *test*/* +``` + +Build HTML docs with: + +```bash +cd docs +make html SPHINXBUILD=$(pipenv --venv)/bin/sphinx-build +``` + ## Contributions https://github.com/cul-it/arxiv-submission-core/blob/master/CONTRIBUTING.md ## What's in the repo -- The [events core package](core/) is provides integrations with the +- The [events core package](core/) provides integrations with the submission database and notification streams, and exposes a Python API for event-based operations on submission (meta)data. Any web services that modify submission data must do so via this package. -- The [API service](api/) provides the client-facing interface for +- The [submission agent](agent/) is a Kinesis consumer that orchestrates + backend processes based on rules triggered by submission events. + + +### In progress/stale + +These components are considerably behind, or only partially complete. Future +development milestones will focus on these services, possibly in separate +repositories. + +- The [API service](metadata/) provides the client-facing interface for submission-related requests. **Status: In progress** - The [Webhooks service](webhooks/) provides an API for creating and managing submission-related webhooks. **Status: Schema only** -- The [Upload service](upload/) is a mock implementation of the file management - service, to be fully implemented elsewhere. **Status: Schema only** -- The [Compile service](compile/) is a mock implementation of the compilation - service, to be fully implemented elsewhere. **Status: Schema only** -- The [Authorization service](authorization/) mocks token-based authorization. - It handles sub-requests from the gateway to authorize client requests, and - mints encrypted JWTs for use by other services. - A toy [Gateway service](gateway/) provides a minimal NGINX server configured - to utilize the authorization service. It provides (proxy) access to - client-facing services, including the API service. - -This project is in its early stages, and has been subject to considerable -churn. As a consequence, test coverage, documentation, and verification are -incomplete. We will actively address these issues as we go along. + to utilize the authentication service (below). It provides (proxy) access to + client-facing services, including the API service. This is close (but not + identical) to what is run in production. + + +## Related components/dependencies + +- The [authentication + service](https://github.com/cul-it/arxiv-auth/tree/develop/authenticator) + handles sub-requests from the gateway to authorize client requests, and mints + encrypted JWTs for use by other services. +- The [client + registry](https://github.com/cul-it/arxiv-auth/tree/develop/registry) + provides OAuth2 workflows. Currently supports the `client_credentials` and + `authorization_code` grant types. +- The [file management service](https://github.com/cul-it/arxiv-filemanager) is + responsible for handling client/user uploads, and perform sanitization and + other QA checks. +- The [submission UI](https://github.com/cul-it/arxiv-submission-ui) provides a + form-driven UI for submission. The UI is built on top of the submission core + package (this repo). +- The [compiler service](https://github.com/cul-it/arxiv-compiler) is + responsible for compiling LaTeX sources to PDF, DVI, and other formats. ## Python dependencies @@ -48,7 +85,7 @@ To install dev/testing dependencies, use: $ pipenv install --dev ``` -## Local deployment (for testing only) +## Local deployment with Docker Compose (for testing only) A Compose file ([docker-compose.yml](docker-compose.yml)) is included in the root of this repository, and can be used to run the services in this project @@ -57,6 +94,8 @@ for local testing and development purposes. See the [Docker documentation](https://docs.docker.com/compose/) for information about using Docker Compose. +### Starting the service cluster + The Compose file included here deploys all services on a custom network, and exposes the gateway service at port 8000 on your local machine. @@ -114,386 +153,134 @@ submission-metadata | spawned uWSGI worker 7 (pid: 27, cores: 100) submission-metadata | spawned uWSGI worker 8 (pid: 28, cores: 100) ``` -At this point, you should be able to interact with the submission API. E.g. - -```bash -$ curl -i -X POST -H "Content-Type: application/json" -H "Authorization: Bearer as392lks0kk32" --data-binary "@metadata/examples/complete_submission.json" http://localhost:8000/submission/ -``` - - -### Authorization - -The toy authorization service simulates access token verification, e.g. after -an [OAuth2 authorization code grant](https://tools.ietf.org/html/rfc6749#section-4.1) -process. The NGINX gateway expects an ``Authorization`` header with an -access token. For example: - -``Authorization: Bearer footoken1234`` - -If the token is valid, the authorizer replaces the access with a JWT that -encodes the identity of the client, the identity of the resource owner (end -user), and an authorization scope. - -Token ``as392lks0kk32`` has scope ``submission:write`` and ``submission:read``, -which should grant access to the entire submission API. - -Here's what the resulting JWT might look like: - -``` -eyJ0eXAiOiJKV1QiLCJhbGciOiJIUzI1NiJ9.eyJjbGllbnQiOiJmb29jbGllbnQiLCJ1c2VyIjoiZm9vdXNlciIsInNjb3BlIjpbInN1Ym1pc3Npb246d3JpdGUiLCJzdWJtaXNzaW9uOnJlYWQiXX0.253M954JUBpokfyP1CEHyk1-sn3Kk42Vyn9W1u59u08 -``` - -Token ``f0da9jso3l2m4`` has scope ``submission:read``, which should allow only -``GET`` requests to relevant endpoints. +### Creating a client -See ``submit/external.py`` for all of the available endpoints. +The ``arxiv/registry`` image provides a helper script to create a new API +client. -TODO: document endpoints here. +Be sure to change ``[NETWORK_NAME]`` to the actual name of the Docker network +that the service cluster is using. It should be +``[something]arxiv-submission-local``. E.g, it might be +``arxivsubmissioncore_arxiv-submission-local``. To be sure, run +``docker network ls | grep arxiv-submission-local``. -For example: - -``` -$ curl -I -H "Authorization: Bearer f0da9jso3l2m4" http://localhost:8000/submit/ -HTTP/1.1 200 OK -Server: nginx/1.13.8 -Date: Tue, 30 Jan 2018 20:06:47 GMT -Content-Type: application/json -Content-Length: 20 -Connection: keep-alive +```bash +docker run -it -e REGISTRY_DATABASE_URI=mysql+mysqldb://foouser:foopass@registry-maria:3306/registry?charset=utf8 --network=[NETWORK NAME] arxiv/registry:0.1 -- python create_client.py ``` -But: +Follow the prompts to create your client. For best results, use the default +scopes (just press enter): +```bash +Brief client name: client +Info URL for the client: http://client +What is it: a client +Space-delimited authorized scopes [public:read submission:create submission:update submission:read upload:create upload:update upload:read upload:read_logs]: +Redirect URI: http://localhost:1234/callback +Created client client with ID 2 and secret o86ScxuqcOffbWKxvyGke6e4wFTIukkjiJEc4ofBj7cDmNLz ``` -$ curl -I http://localhost:8000/submit/ -HTTP/1.1 403 Forbidden -Server: nginx/1.13.8 -Date: Tue, 30 Jan 2018 20:07:13 GMT -Content-Type: application/json -Content-Length: 32 -Connection: keep-alive -``` - - -# Submission API: Context -The arXiv submission API provides programmatic access to the arXiv submission -system for API consumers. +Note the client ID and secret at the end -- you'll need those. -## Submission Workflows +### Client credentials authorization -### Proxy Submission +In production, the submission API will require an authorization code +(three-legged authorization) grant. For local testing, however, it may be more +convenient to use client credentials. -Proxy submission is when an API client submits on behalf of an arXiv user who -has explicitly delegated authorization to the client. +Use your client ID and secret (above) to get an access token. -A client that wishes to perform proxy submission must have ``auth:3legged`` and -``submit:proxy`` scope, and must implement a secure three-legged OAuth -authorization process. +Include the header: ``Content-Type: application/x-www-form-urlencoded`` -In proxy submission, the arXiv user who has authorized the client to submit -on their behalf will be the primary owner of the submission. This allows the -user to intervene directly on the submission process later on, and provides -some flexibility to clients who may wish only to partially implement the -submission process. +Include the following parameters: -Note that in the classic arXiv system, "proxy submission" referred to bulk -deposit via the SWORDv1 API. +- ``grant_type`` -- This should be ``client_credentials`` +- ``client_id`` -- This should be your client ID +- ``client_secret`` -- This should be your client secret -### Bulk Submission - -Bulk submission is when an API client submits directly to arXiv without the -involvement of an arXiv user. Bulk submission may be appropriate for -conference proceedings or other large-volume deposits for which it is -impossible or impracticable to involve individual users. - -A client that wishes to perform bulk submission must have a ``submit:bulk`` -scope. +```bash +$ curl -XPOST -H "Content-Type: application/x-www-form-urlencoded" -d "grant_type=client_credentials&client_id=2&client_secret=o86ScxuqcOffbWKxvyGke6e4wFTIukkjiJEc4ofBj7cDmNLz" http://127.0.0.1:8000/api/token +{"access_token": "4tkstLJauH65EwpewmpJ0IugdqFLMctHiIjl5IvWxK", "expires_in": 864000, "token_type": "Bearer"} +``` -In bulk submission, the client is the primary owner of the submission. To -give ownership of the submission to an arXiv user, the client must take -explicit action to transfer ownership. +You should get back a JSON document with the following properties: -## Access & Authorization +- ``access_token`` -- That's your access token. +- ``expires_in`` -- Number of seconds until the token expires. +- ``token_type`` -- This should be ``"Bearer"``. -User of the submission API requires client credentials, which can be obtained -via the arXiv API Client Registry. See ... +### Create a submission -### Relevant Scopes -Ensure that your client credentials have been granted the necessary scopes for -your use-case. To request that missing scopes be added to your credentials, -see ... +At this point, you should be able to interact with the submission API. -- ``auth:3legged``: Required for proxy submission. -- ``submit:proxy``: Required for proxy submission. -- ``submit:bulk``: Required for bulk submission. +Be sure to include the headers: +- ``Content-Type: application/json`` -- That's because the submission metadata + API speaks JSON. +- ``Authorization: Bearer [your access token]`` -- If you don't include a valid + token, you'll get a 401 Unauthorized response. Or a 403, if you've really + been unruly. -### Two-legged Authorization -Two-legged authorization grants access to resources for which end-user -involvement is not required. This is suitable for bulk submission, but not -proxy submission. This authorization mechanism involves exchanging your -client id and client secret for an access token. +You can use the pre-baked submission in +``metadata/examples/complete_submission.json`` to get up and running. ```bash -$ curl -i -L \ -> -d "client_id=[ your client id ]" \ -> -d "client_secret=[ your client secret ]" \ -> -d "grant_type=client_credentials" \ -> "https://api.arxiv.org/auth/token" -{"access_token":"[ your access token ]","token_type":"bearer", -"refresh_token":"[ your refresh token ]","expires_in":3600} +$ curl -i -X POST -H "Content-Type: application/json" \ + -H "Authorization: Bearer 4tkstLJauH65EwpewmpJ0IugdqFLMctHiIjl5IvWxK" \ + --data-binary "@metadata/examples/complete_submission.json" http://localhost:8000/submission/ ``` -Use your access token in subsequent requests by passing it in the Authorization -header. For example: +But note that you can create a submission with far less than that! ```bash -$ curl -i -L \ -> -H "Authorization: [ your access token ]" \ -> "https://api.arxiv.org/submit/" -``` +$ curl -i -X POST -H "Content-Type: application/json" \ + -H "Authorization: Bearer 4tkstLJauH65EwpewmpJ0IugdqFLMctHiIjl5IvWxK" \ + --data "{}" http://localhost:8000/submission/ -When your access token expires, you can request a new one with: +HTTP/1.1 201 CREATED +Server: nginx/1.13.12 +Date: Mon, 24 Sep 2018 19:47:33 GMT +Content-Type: application/json +Content-Length: 1027 +Location: http://localhost:8000/submission/7/ +Connection: keep-alive -```bash -$ curl -i -L \ -> -d "refresh_token=[ your refresh token ]" \ -> -d "grant_type=refresh_token" \ -> "https://api.arxiv.org/auth/token" -{"access_token":"[ your new access token ]","token_type":"bearer", -"refresh_token":"[ your new refresh token ]","expires_in":3600} +{"active":true,"arxiv_id":null,"client":{"client_id":"2"},"compilations":[],"created":"2018-09-24T19:47:33.251494","creator":{"affiliation":"","agent_type":"User","email":"","endorsements":["*.*"],"forename":"","identifier":null,"name":" ","native_id":null,"suffix":"","surname":"","user_id":null},"delegations":{},"finalized":false,"license":null,"metadata":{"abstract":null,"acm_class":null,"authors":[],"authors_display":"","comments":"","doi":null,"journal_ref":null,"msc_class":null,"report_num":null,"title":null},"owner":{"affiliation":"","agent_type":"User","email":"","endorsements":["*.*"],"forename":"","identifier":null,"name":" ","native_id":null,"suffix":"","surname":"","user_id":null},"primary_classification":null,"proxy":null,"announced":false,"secondary_classification":[],"source_content":null,"status":"working","submission_id":7,"submitter_accepts_policy":null,"submitter_confirmed_preview":false,"submitter_contact_verified":false,"submitter_is_author":null,"updated":"2018-09-24T19:47:33.251494"} ``` -### Three-legged Authorization -Three-legged authorization allows arXiv users to delegate API clients to take -actions on their behalf. This is required for proxy submission. Note that your -client credentials must have an associated ``auth:3legged`` scope, and you -must have entered a valid callback URI for your application. - -- Client initiates authorization by directing the user to the arXiv API - authorization endpoint: ``https://api.arxiv.org/auth/authorize?client_id=[ your client ID ]`` -- User is asked to log in and authorize your client. If the user does not - already have an arXiv account, they are given the option to create one at - this time, and then proceed with authorization. -- If the user authorizes your client, they will be redirected to your - registered callback URI. A short-lived authorization code will be included - as a GET parameter, e.g. ``https://yourapp.foo/callback?code=[ auth code ]`` -- Client may exchange the short-lived authorization code for a longer-lived - authorization token: +You can update a submission by POSTing fields that you want to update. ```bash -$ curl -i -L \ -> -d "client_id=[ your client id ]" \ -> -d "client_secret=[ your client secret ]" \ -> -d "code=[ your auth code ]" \ -> -d "grant_type=authorization_code" \ -> "https://api.arxiv.org/auth/token" -{"access_token":"[ your access token ]","token_type":"bearer", -"refresh_token":"[ your refresh token ]","expires_in":3600} -``` +$ curl -i -X POST -H "Content-Type: application/json" \ + -H "Authorization: Bearer 4tkstLJauH65EwpewmpJ0IugdqFLMctHiIjl5IvWxK" \ + --data '{"metadata":{"title":"The theory of life and everything","doi":"10.00123/foo45678"}}' \ + http://localhost:8000/submission/7/ -The authorization code may only be used once. Multiple attempts to exchange the -authorization code for an authorization token will invalidate both the -authorization code and the authorization token that was generated on the first -request. - -Use your authorization token in subsequent requests by passing it in the -Authorization header. For example: +HTTP/1.1 200 OK +Server: nginx/1.13.12 +Date: Mon, 24 Sep 2018 20:23:57 GMT +Content-Type: application/json +Content-Length: 1060 +Location: http://localhost:8000/submission/7/ +Connection: keep-alive -```bash -$ curl -i -L \ -> -H "Authorization: [ your access token ]" \ -> "https://api.arxiv.org/submit/" +{"active":true,"arxiv_id":null,"client":null,"compilations":[],"created":"2018-09-24T20:22:33.498688","creator":{"affiliation":"","agent_type":"User","email":"","endorsements":["*.*"],"forename":"","identifier":null,"name":" ","native_id":null,"suffix":"","surname":"","user_id":null},"delegations":{},"finalized":false,"license":null,"metadata":{"abstract":null,"acm_class":null,"authors":[],"authors_display":"","comments":"","doi":"10.00123/foo45678","journal_ref":null,"msc_class":null,"report_num":null,"title":"The theory of life and everything"},"owner":{"affiliation":"","agent_type":"User","email":"","endorsements":["*.*"],"forename":"","identifier":null,"name":" ","native_id":null,"suffix":"","surname":"","user_id":null},"primary_classification":null,"proxy":null,"announced":false,"secondary_classification":[],"source_content":null,"status":"working","submission_id":7,"submitter_accepts_policy":null,"submitter_confirmed_preview":false,"submitter_contact_verified":false,"submitter_is_author":null,"updated":"2018-09-24T20:23:57.754003"} ``` -## Endorsement - -Most subject areas in arXiv require that the submitter be endorsed by another -member of the scientific community. For more information about what endorsement -is and how it works on a per-user level, see... - -In addition to the required authorization scopes mentioned above, the API -client must usually also be granted an endorsement scope for the subject areas -to which it intends to submit. Endorsement scopes may be requested through the -arXiv API Client Registry; see ... - -Exception: in the case of proxy submission, the user on whose behalf the -client submits to arXiv may already be endorsed for a particular subject area. -If so, the client need not be endorsed for that subject area for the submission -to proceed. - -## Submission Overview - -The submission process is essentially the same for proxy and bulk submissions, -as ownership is inferred from the authorization token provided in each -request. - -### Create a new submission - -Submission is initiated upon creation of a new submission resource, by -POSTing to the ``/submission/`` endpoint. The submission resource need not be -complete at this time. See ... - -### Upload source - -The submission source package may then be added by PUTing the package (see -... ) to the source endpoint: ``/source/``. The response will include a -redirect to a status endpoint, e.g. ``/source/{upload_id}/``. The source -package will be sanitized and unpacked, which may take a little while, and the -status endpoint can be monitored for progress. - -Alternatively, a webhook may be configured to receive notifications about -source processing events. See ... - -#### Supported formats - -... - -#### Compilation - -**Note**: compilation applies to postscript and LaTeX submissions. PDF and -other submissions will skip this step. - -When a source package is uploaded, by default the arXiv submission system will -attempt to compile the source to PDF. Automatic compilation may be disabled, -e.g. to allow for a multi-step upload process. To trigger compilation, a POST -request may be made to the compilation endpoint: ``/compile/``. The response -will include a reference to a status endpoint that can be monitored for -progress; alternatively, a webhook may be configured to receive notifications -about compilation. - -If compilation is successful, the resulting PDF may be retrieved from: -``/compile/{compilation_id}/build/pdf/``. Compilation log output may be -retrieved from ``/compile/{compilation_id}/build/log/``. +You can finalize the submission by updating the ``finalize`` field to ``true``. +But if fields are missing, you'll get an error. -Note that the source must compile successfully for submission to proceed, and -the submission resource must be updated to confirm that the client/user is -satisfied with the compiled paper. It is up to the client whether/how such -confirmation should occur. - -### Update submission - -Updates to the submission may be made via subsequent POST requests to the -submission endpoint (``/submission/{id}/``). This allows the client to -spread the submission process over several steps, if desired. - -### External links - -External links may be attached to the submission by POSTing to the links -endpoint, ``/submission/{id}/links/``. This may be used to supplement the -core metadata with links to external resources, such as code, data, multimedia -content, or an URI for an alternate version of the paper (e.g. in a -peer-reviewed journal). See ... - -### Submit - -Once all required procedural and descriptive metadata have been added to the -submission, it may be submitted by POSTing to the submit endpoint: -``/submission/{id}/submit/``. See ... - -A client may register to receive updates about one or all submissions for which -it is responsible. To register a webhook for a specific submission, a POST -request may be made to ``/submission/{id}/webhooks/``. To register a webhook -for all submissions for which the client is responsible, a POST request may be -made to ``/webhooks/``. See ... - -### Publication - -Once the submission has been published, the submission will be updated with -its arXiv identifier and version number. If a webhook is registered, a -publication notification will also be issued. - -### Transfer ownership, delegate - -The client may transfer ownership of the submission to another agent (user or -another client) via the ``/submission/{id}/transfer/`` endpoint. Note that this -is non-reversible without intervention from the recipient. An alternative is to -delegate editing privileges (without relinquishing ownership) to another agent, -via the ``/submission/{id}/delegate/`` endpoint. See ... - -# arXiv Submission & Publication Process - -## arXiv Submissions & States - -An arXiv submission is comprised of a source package and a collection of -procedural and descriptive metadata. The source package is usually comprised of -a scientific paper (generally in LaTeX) and auxiliary resources (e.g. images, -tables, errata); see ... - -The primary objectives of the arXiv submission system are rapid dissemination -of scientific findings, and to support QA/QC workflows for arXiv's volunteer -moderators and the operations team. For a glimpse into how arXiv submissions -are processed on a daily basis, see [this recent blog -post](https://blogs.cornell.edu/arxiv/2018/01/19/a-day-in-the-life-of-the-arxiv-admin-team/). - -In support of rapid dissemination, a core requirement for the submission -system is that the daily publication/announcement process should continue -even in the absence of human intervention. In other words, if the moderation -and operations teams were disbanded tomorrow, arXiv would continue to accept -and disseminate publications as usual. - -At any given time, a submission will be in one of the states described below. - -It should be noted that in the arXiv-NG submission system these states are -defined in terms of the the data that describes the submission, **not** by a -flag in the database. - -![](../docs/_static/diagrams/submissionState.png) - -### Working +``` +$ curl -i -X POST -H "Content-Type: application/json" \ + -H "Authorization: Bearer 4tkstLJauH65EwpewmpJ0IugdqFLMctHiIjl5IvWxK" \ + --data '{"finalized": true}' \ + http://localhost:8000/submission/7/ + +HTTP/1.1 400 BAD REQUEST +Server: nginx/1.13.12 +Date: Mon, 24 Sep 2018 20:25:32 GMT +Content-Type: application/json +Content-Length: 62 +Connection: keep-alive -When the submission process is initiated, it generally lacks some of the -(meta)data and/or content required for publication. For example, the -submission process may be initiated by sending preliminary information for -only a few metadata fields, leaving the submission source package to be -uploaded separately. Several users and/or API clients may be involved in -contributing information about the submission. The source package must -compile to a publishable PDF before a submission can leave the working -state. - -### Processing - -Once a submission is finalized (ready for publication), it is subject to -a handful of automated QA/QC checks. For example, we need to be able to -extract plain text content from the compiled paper for subsequent checks. -Depending on the results of those checks, the submission may be bounced -back to the working state to correct problems. Generally, a submission -remains in the processing state for a very short period of time (seconds or -minutes). - -### Submitted - -If the preliminary checks pass, the submission is considered to be in the -submitted state. Automated checks for technical issues may also be applied -while the submission is in this state, and members of the moderation and -operations teams may inspect the paper for quality or to address issues -flagged by the technical checks. If a moderation flag is applied to the -submission during this process, the submission transitions to the **On -Hold** state (below). If no moderation or administrative flags are raised -on the submission, the submission will automatically transition to the -**Scheduled** state (below) at one of two cutoff times. - -### On Hold - -A submission in this state has been flagged by a moderator or by an -automated QA/QC process for potential problems. Submissions in this state -are usually inspected by the operations team, who may reach out to the -submission owner. If and when the issues with the submission are resolved, -an administrator will remove the blocking flags from the submission, and -the submission will return to the **Submitted** state. - -### Scheduled - -Any submissions in the **Submitted** state at the publication cut-off time -(currently 2PM ET) will be automatically scheduled for publication on the -same day (currently 8PM ET). Any remaining submissions in the **Submitted** -state at the next-day cutoff (currently 8PM ET) will be scheduled for -publication on the following day. - -### Published - -The automated publication process runs daily, currently at 8PM ET. Any -submissions scheduled for the current day will be updated with their -arXiv ID and version, and a publication timestamp. At that time, the -submission is considered **Published**. No further changes -may be made to a submission in this state. +{"reason":"Invalid Stack:\n\tMissing primary_classification"} +``` diff --git a/admin/README.md b/admin/README.md new file mode 100644 index 0000000..28657e8 --- /dev/null +++ b/admin/README.md @@ -0,0 +1,3 @@ +# Admin fragments + +This directory contains fragments related to admin quality assurance checks. diff --git a/admin/checks.py b/admin/checks.py new file mode 100644 index 0000000..bc4fdaf --- /dev/null +++ b/admin/checks.py @@ -0,0 +1,145 @@ +self._no_strange_characters(submission) +self._no_author_titles(submission) +self._starts_with_uppercase(submission) +self._balanced_brackets(submission) +if self.title.lower().startswith('title'): + raise InvalidEvent(self, "Must not start with `title`") +self._balanced_quotes(submission) +if self.title.startswith(' ') or self.title.endswith(' '): + raise InvalidEvent(self, "Must not start or end with spaces") +self._no_double_spaces(submission) +self._check_for_html(submission) +self._check_for_tex_junk(submission) +stylistic_checks(self, submission, self.title) + +def _no_strange_characters(self, submission: Submission) -> None: + """Check for odd characters.""" + for char in "#@": + if char in self.title: + raise InvalidEvent(self, f"Strange character: {char}") + +def _no_author_titles(self, submission: Submission) -> None: + """Check for author titles.""" + ptn = r"^\s*(prof|dr|professor|doctor|lecturer|mr)\s*$" + m = re.match(ptn, self.title) + if m: + raise InvalidEvent(self, f"Author title found: {m.group(1)}") + +def _starts_with_uppercase(self, submission: Submission) -> None: + """ + Verify that the title starts with an upper character. + + There are some exceptions to this rule, however... + """ + if self.title[0].islower(): + exceptions = r"^(p\-adic$|de$|alpha|beta|gamma|phi|tau)" + if not re.match(exceptions, self.title): + raise InvalidEvent(self, + "Must not start with a lowercase character") + +def _balanced_brackets(self, submission: Submission) -> None: + """Curly brackets must be balanced, and not enclose the whole title.""" + if re.match(r"^[^\{]*\}", self.title): + raise InvalidEvent("Contains unbalanced }") + if re.match(r"\{[^\}]*$", self.title): + raise InvalidEvent("Contains unbalanced {") + if re.match(r"^\s*\{", self.title) and re.match(r"\}\s*$", self.title): + raise InvalidEvent("Must not be wrapped in brackets ({})") + +def _balanced_quotes(self, submission: Submission) -> None: + """Verify that quotation marks are balanced.""" + m = re.search(r"\`\`(.+)\'\'", self.title) + if m: + raise InvalidEvent(self, f"Unbalanced quotes around {m.group(1)}") + +def _no_double_spaces(self, submission: Submission) -> None: + """Verify that no double-spaces are present.""" + if re.search(r"[\s]{2,}", self.title): + raise InvalidEvent(self, "Contains multiple consecutive spaces") + +def _capitalized_words(self, submission: Submission) -> None: + """Check for unreasonable capitalization.""" + for word in self.title.split(): + if len(word) < 6: + continue + if word.isupper() and word not in ACCEPTABLE_CAPITALIZATIONS: + raise InvalidEvent(self, f"Excessive capitalization: {word}") + + + +def _check_for_tex_junk(self, submission: Submission) -> None: + """Check for TeX junk.""" + if re.match(r"^\s*\{.{,7}|.{,7}\}\s*$", self.title): + raise InvalidEvent(self, "Contains TeX junk") + + +def stylistic_checks(event: Event, submission: Submission, value: str) -> None: + """ + Apply a wide range of stylistic checks on ``value``. + + These are from arXiv::Submit::MetaCheck.absfix. + """ + # TODO: do we need to enforce chars in ([^\012\015\040-\177]) ? + PATTERNS = [ + # We don't like tildes in astronomical catalog identifiers. + (r"(NGC|UGC|SN)~(\d+)", "Remove ~ between %s and %s"), + (r"(GRO)~", "Remove ~ after GRO"), + # Nor in front of citations. + (r"~(\\cite\{)", "Remove ~ in front of %s"), + # Nor in quantities. + (r"\b(\d+\}?\$?)~(MeV|GeV|TeV|keV|PeV|eV)\b", + "Remove ~ between %s and %s"), + # Check for TeX linebreaks. + (r"(\\\\)", "TeX linebreaks (%s) are not allowed"), + # Some other strange things we don't like. + (r"\\\/", "Remove \/"), + (r"\\([loO]) ", "Possible error, change to \{\\%s\}?"), + (r"\\([vcu]) ([A-Za-z])", "TeX accents with space: %s %s"), + (r"(\\[\'\"]\\[ij]) ", "Incorrect ij with acute etc. change: \{%s\}?"), + # Look for extraneous $. + (r"\$(d|s|p)\$-(wave|state)", f"unnecessary \$ change to %s-%s"), + # Check for tildes in some idiosyncratic cases... + (r"(Phys.~Rev.~Lett)", "Remove ~ from %s"), + (r"(de~Sitter)", "Remove ~ from %s"), + (r"\b(et~al)\b", "Remove ~ from %s"), + (r"(i\.e\.~)", "Remove ~ from %s"), + (r" ( e \. g \. ~ ) ", "Remove ~ from %s"), + # Abbreviation style... + (r"(\\ie\b)", "Change %s to i.e."), + (r"(\\eg\b)", "Change %s to e.g."), + (r"(\\etal\b)", "Change %s to et al"), + (r"R--parity", "Remove extra - from R-parity"), + (r"X--ray", "Remove extra - from X-ray"), + # What the heck are these all about? + (r"(^|\s)\*([\w\-]+)\*(\s|$)", "Remove ascii emphasis from %s%s%s"), + (r"\b(\w+)\.(STY|TEX)\b", "Change to \L%s.%s\E "), + (r"\$(-?\d+)\/([A-Z])\$", "Remove %s in front of %s"), + + # More weird TeXisms. + (r" (Large) - \$N \$", "Remove \$ after %s around N "), + (r"\\vert", "Convert vert to |"), + (r"(\\<)", "Remove tex tabbing: %s"), + (r"(\\>)", "Remove tex tabbing: %s"), + (r"\\overline", "Change overline to bar"), + (r"\\widehat", "Change widehat to hat"), + (r"\{\\mbox(\{[^\{\}]+\})\}", "Remove \{mbox ... \} from %s"), + # \, \; \ --> + (r"(\.\.\.\.)\\( |,|;)(\.\.\.\.)", "Unescape character: %s\\%s%s"), + (r"\`\`.*\"", "Inconsistent quotes"), + (r"\\lq\b", "Change lq to real quote"), + # Here are some common mipsellings... + (r"\bpostcript", "postscript misspelled"), + (r"\bmissprint", "misprint misspelled"), + # More weird stuff. + (r"\\(left|right)\|", "remove %s in front of |"), + (r"\\\(", "Remove \ in front of ("), + (r"\\\)", "Remove \ in front of )"), + # Should not have a space before a subscript _ + (r"( +_)", "Remove space before subscript: %s"), + (r"\'\'", "Change two single quotes to double quotes") + ] + for pattern, message in PATTERNS: + match = re.search(pattern, value) + if match: + logger.error("Failed pattern `%s`: %s", pattern, value) + raise InvalidEvent(event, message % match.groups()) diff --git a/admin/util.py b/admin/util.py new file mode 100644 index 0000000..28ca367 --- /dev/null +++ b/admin/util.py @@ -0,0 +1,796 @@ +"""Helpers for :mod:`arxiv.submission.domain`.""" + +ACCEPTABLE_CAPITALIZATIONS = [ + "AADEBUG", + "AASLATEX", + "AASTEX", + "ABBOTT", + "ABLAUT", + "ABLHASAN", + "ABRIDGED", + "ACERBI", + "ADONIS", + "ADVANCES", + "AGLIETTI", + "AIHENP", + "AIKAWA", + "AIRAPT", + "AKIMOV", + "AKOROTKOV", + "ALADIN", + "ALAVAGNO", + "ALBERICO", + "ALBERTA", + "ALFONSO", + "ALISHAHIHA", + "ALLFRAME", + "AMANDA", + "AMSLATEX", + "AMSPPT", + "AMSSYM", + "AMSTEX", + "ANAHIT", + "ANAZAWA", + "ANCHORDOQUI", + "ANDERSEN", + "ANDREA", + "ANDREANI", + "ANDRIANOPOLI", + "ANDRIANOV", + "ANGELA", + "ANGELOW", + "ANTARES", + "ANTONOV", + "ANTONUCCIO", + "ANUATC", + "ARANJAN", + "ARISUE", + "ARKANI", + "ASCHIERI", + "ATHENA", + "AUGIER", + "AUTOFIB", + "AXODRAW", + "BACODINE", + "BAIKAL", + "BALAZS", + "BALLESTRERO", + "BALQSO", + "BANERJEE", + "BARBERO", + "BARBIERI", + "BARONE", + "BARRAT", + "BASSETTO", + "BAULIEU", + "BCHIBISOV", + "BECATTINI", + "BECCHI", + "BEHNIA", + "BELLUCCI", + "BERCEANU", + "BERERA", + "BEREZHIANI", + "BERGLIAFFA", + "BERING", + "BERNARDO", + "BERVILLIER", + "BGEARN", + "BHAGEN", + "BHARMS", + "BIANCHI", + "BIFERALE", + "BLANCA", + "BLESSING", + "BNUPHY", + "BONATSOS", + "BOOMER", + "BOOMERANG", + "BOREXINO", + "BOSTED", + "BRAHMS", + "BRIGNOLE", + "BUCHALLA", + "BULGAC", + "BUTURI", + "BVIJHK", + "BVRJHK", + "CABALLERO", + "CALVANI", + "CAMICI", + "CANGAROO", + "CAPEZZALI", + "CARACCIOLO", + "CAROLINE", + "CARONE", + "CARRETTA", + "CARRUTHERS", + "CARTER", + "CASCADE", + "CASELLE", + "CASPEC", + "CASSINI", + "CASSISI", + "CASTOR", + "CATELAN", + "CDMTCS", + "CEADRF", + "CELEBONOVIC", + "CELESTE", + "CENTER", + "CETDST", + "CHAMATI", + "CHANDRA", + "CHAVES", + "CHIEZE", + "CHORUS", + "CHOUDHURY", + "CHRVLZH", + "CICUTA", + "CIMENTO", + "CINVESTAV", + "CIOTTI", + "CIPANP", + "CIRIGLIANO", + "CITAMU", + "CLEMENT", + "CLOUDY", + "CMBFAST", + "CNELSON", + "COBRAS", + "COLDEA", + "COLING", + "COMPASS", + "COMPTEL", + "COMPTON", + "CONFERENCE", + "CONICET", + "CONNIE", + "CONTEXT", + "CORAVEL", + "CORBEL", + "CORNELL", + "CORSIKA", + "COSMION", + "COSMOS", + "COSPAR", + "CPLEAR", + "CRENSHAW", + "CSIC", + "CTSTATEU", + "CXXXIII", + "CXXXIV", + "CYCOMP", + "DANIEL", + "DAOPHOT", + "DAPHNE", + "DAPNIA", + "DAPNIS", + "DASWAM", + "DAWSON", + "DEBOER", + "DECNET", + "DELDUC", + "DELEOS", + "DELFINO", + "DELPHI", + "DEMARTEAU", + "DEMIRM", + "DESILVA", + "DHURJATI", + "DICKMAN", + "DIQUARKS", + "DIRECT", + "DISASTER", + "DLBBAT", + "DOMINICI", + "DONIZETI", + "DORIGO", + "DREINER", + "DRYUMA", + "DSYIBM", + "DURHAM", + "DYNAMIC", + "EDINBURGH", + "EETDST", + "EFFECTIVE", + "EINSTEIN", + "ELBDPF", + "ELEONORA", + "ELSART", + "ELSEVIER", + "ENSLAPP", + "ENVGRAM", + "EPIPHANY", + "EPSFIG", + "ERNEST", + "ESOMAT", + "ESPART", + "ESPOSITO", + "EUROGAM", + "EUROSPEECH", + "EXOSAT", + "EXOTIC", + "FAIRLIE", + "FARAONI", + "FARGION", + "FASTUS", + "FECLESC", + "FEDERICI", + "FERMILAB", + "FERRARAS", + "FERRARI", + "FEYNMAN", + "FIELDS", + "FINDER", + "FISIST", + "FLORIAN", + "FNLVOV", + "FNMILGRM", + "FNRINAT", + "FNUSOV", + "FORMATION", + "FORNENGO", + "FORTRAN", + "FRANCO", + "FRANZKOWSKI", + "FREIBURG", + "FREUDENREICH", + "FRITIOF", + "FSHEWB", + "FTLIPKIN", + "FUJITA", + "FUKUYAMA", + "FUNAKUBO", + "FURUKAWA", + "FUSION", + "GAILLARD", + "GALLAY", + "GALLEX", + "GANDENBERGER", + "GARBACZEWSKI", + "GASPERIN", + "GASPERINI", + "GBRUNETTI", + "GCARTER", + "GEBHARD", + "GELINA", + "GENESIS", + "GENIUS", + "GENOVESE", + "GENTLE", + "GEOLOGY", + "GEORGE", + "GERALDO", + "GERJIKOV", + "GIACOMO", + "GIAMARCHI", + "GISELLA", + "GISSEL", + "GIUDICE", + "GIUNTI", + "GIVEON", + "GODFREY", + "GOLDBERG", + "GOLDONI", + "GOLDSTEI", + "GONZALEZ", + "GORDON", + "GORSKI", + "GRAESSER", + "GRANAT", + "GRAPESPH", + "GRATUS", + "GRILLI", + "GRISARU", + "GRNLNDDT", + "GROCSE", + "GROJEAN", + "GROOTE", + "GRUNFELD", + "GUNDUC", + "GUNION", + "HADRON", + "HAEUSLER", + "HARADA", + "HARVARD", + "HASHIMOTO", + "HAXTON", + "HBCHPT", + "HDECAY", + "HEEMAN", + "HELION", + "HELIOS", + "HELLAS", + "HELLER", + "HELMUTH", + "HEMCGC", + "HERMES", + "HERRERA", + "HERWIG", + "HEWETT", + "HHUANG", + "HIGGSPV", + "HIJING", + "HILLHOUSE", + "HINDAWI", + "HIPPARCOS", + "HIRLAM", + "HIROTANI", + "HITOSHI", + "HKUPHYS", + "HOLLIK", + "HOMESTAKE", + "HORWITZ", + "HYLANDER", + "IASSNS", + "IAUNAM", + "ICARUS", + "ICASSP", + "ICGTMP", + "ICIMAF", + "ICPAQGP", + "ICSSUR", + "IFREMER", + "IFUNAM", + "IMEDEA", + "IMEUSP", + "IMPERIAL", + "IMPNWU", + "INFIQC", + "INFNCA", + "INFNCT", + "INFNFE", + "INFNNA", + "INFOCOM", + "INTEGRAL", + "INTERACTIONS", + "IRAPUAN", + "ISHEPP", + "ISHEPPXIII", + "ISHIDA", + "ISOCAM", + "ISOPHOT", + "ISOSHIMA", + "ISOSWS", + "ITESRE", + "ITHPII", + "IVANOV", + "JAKOBCZYK", + "JASKOLSKI", + "JETSET", + "JMORRIS", + "JMYANG", + "JNCASR", + "JPNYITP", + "JUNSHIN", + "JUSTINO", + "KACKKACHI", + "KAISER", + "KAKIGI", + "KALELKAR", + "KALOPER", + "KAMIOKANDE", + "KANAZAVA", + "KANAZAWA", + "KARADAYI", + "KARMEN", + "KASCADE", + "KAWAGUCHI", + "KAWAMATA", + "KHALIL", + "KHALLER", + "KHUDIAN", + "KIEVSKY", + "KIRTLEY", + "KITADA", + "KODAMA", + "KOERNER", + "KOHAMA", + "KOHANOFF", + "KOKADO", + "KOMABA", + "KORALW", + "KOTZINIAN", + "KOUNOH", + "KREIMER", + "KREUZER", + "KSUCNR", + "KULKARNI", + "KUMANO", + "KUPHSX", + "KURIKI", + "KUTSCHERA", + "KYOKUGEN", + "KYUSHU", + "LABASTIDA", + "LAFAGE", + "LAGEOS", + "LAMUPHYS", + "LANDAU", + "LARGER", + "LAROCHE", + "LATTICE", + "LAURQCD", + "LCLAVELL", + "LECHEMINANT", + "LEMONON", + "LEONARDO", + "LEPTON", + "LGONZALZ", + "LINERS", + "LINOBS", + "LONDON", + "LORENTZ", + "LPTENS", + "LUCLUS", + "LUDOVICI", + "LUKIERSKI", + "LYASSC", + "MAARTENS", + "MACHET", + "MAEZONO", + "MAGGIORA", + "MAILBOX", + "MAKEENKO", + "MALTONI", + "MANCINI", + "MANFRED", + "MANFREDINI", + "MANGANO", + "MARCINEK", + "MARCONI", + "MARKIEWIC", + "MARROCCHESI", + "MARTELLI", + "MARTIN", + "MARTIS", + "MASETTI", + "MASSIMO", + "MATERA", + "MATSUSHITA", + "MATTHIAS", + "MAXIMA", + "MAXWELL", + "MCDONALD", + "MEDVEDEV", + "MEGGIOLARO", + "MEIERH", + "MELEKHIN", + "MENOTTI", + "MERLIN", + "MERRITT", + "METDST", + "MHJENSEN", + "MICHAEL", + "MILAGRO", + "MILLENER", + "MILLER", + "MINERVA", + "MINSEOK", + "MIYAMA", + "MKLIMEK", + "MONTANARI", + "MORICONI", + "MOSAIC", + "MOSFET", + "MOSFETS", + "MOULAY", + "MOUROY", + "MPIPKS", + "MRENNA", + "MSUHEP", + "MSUPHY", + "MSWORD", + "MUELLER", + "MUKHERJEE", + "MUKOUYAMA", + "MUNEHISA", + "MURAKAMI", + "MURAYAMA", + "MURGIA", + "NAGAOSA", + "NAKADA", + "NAKAJIMA", + "NELSON", + "NEUTRINO", + "NICMOS", + "NIKHEF", + "NIRMOS", + "NOJIRI", + "NORDITA", + "NORMAL", + "NORMAN", + "NUMBERS", + "OBSERVATORY", + "OGREID", + "OHSTPY", + "OKUYAMA", + "OLEVIN", + "OOGURI", + "ORANGE", + "ORDINARY", + "OREGON", + "ORFEUS", + "ORIGINS", + "ORMAND", + "OSOSIRIS", + "OSWALD", + "OZDEMIR", + "OZERNOY", + "PADOVANI", + "PAPERS", + "PARADISE", + "PARMELA", + "PASCAL", + "PASCHKE", + "PASCOS", + "PASQUALE", + "PASQUINIM", + "PEGASE", + "PELAEZ", + "PELITI", + "PENDULUM", + "PHCHENG", + "PHENIX", + "PHIDIAS", + "PHOTON", + "PHYSIC", + "PHYSICS", + "PHYSMAS", + "PHYZZX", + "PICTEX", + "PIERCE", + "PLANCK", + "PLATANIA", + "POINCARE", + "POLDIS", + "POLONYI", + "POLRAD", + "POLYKARP", + "POMONA", + "POPOWICZ", + "POSTECH", + "POSTPICTEX", + "POSTSCRIPT", + "POTENT", + "PRADISI", + "PREPICTEX", + "PRINCETON", + "PROFILE", + "PROKHOROV", + "PROMETHEUS", + "PROPITIUS", + "PROVERO", + "PSEUDOGAP", + "PSTRICKS", + "PUMPLIN", + "PYTHIA", + "QCDINS", + "QFTHEP", + "QUIROS", + "RADCOR", + "RAHIMI", + "RANJITA", + "RAPGAP", + "RAQUEL", + "README", + "REGIME", + "REHFELD", + "RESCEU", + "REVIEW", + "REVTEX", + "RICCARDO", + "RICHERT", + "RIDOLFI", + "ROBERT", + "ROBERTO", + "ROGERIO", + "ROMANINO", + "ROSNER", + "ROUHANI", + "ROZANOV", + "RUDKOVSK", + "RUTGERS", + "RYUTHP", + "SABSNS", + "SACLAY", + "SACLIOGLU", + "SADHAN", + "SAGNOTTI", + "SAKKYO", + "SALASNICH", + "SAMINADAYAR", + "SAMPLE", + "SANITA", + "SAPHIR", + "SARIRA", + "SASAKI", + "SATURNE", + "SAUVAGE", + "SCHAPOSNIK", + "SCHNITTGER", + "SCHNITZER", + "SCHOLTEN", + "SCHOLTZ", + "SCHVELLINGER", + "SCIENCE", + "SCIUTO", + "SCREEN", + "SEMINARA", + "SERGIO", + "SETLUR", + "SHAOJING", + "SHARAPOV", + "SHEIKHJABBARI", + "SHELDON", + "SHIFMAN", + "SHIROKOV", + "SHWETKETU", + "SIGACT", + "SIGLEX", + "SIGPHON", + "SIGPLAN", + "SIGRAV", + "SILAFAE", + "SILVIO", + "SIMBAD", + "SIMULATOR", + "SINELSHCHIKOV", + "SISSAREF", + "SKICAT", + "SMBARR", + "SMERALDI", + "SMILGA", + "SMUHEP", + "SNBNCBS", + "SNUCTP", + "SOGANG", + "SOUBIRAN", + "SPAGNA", + "SPECTRUM", + "SPROCL", + "SRCSSM", + "STACEE", + "STAMENOV", + "STANEV", + "STARCK", + "STARNET", + "STEFANO", + "STIRAP", + "STOILOVA", + "STOJKOVIC", + "STRAMAGLIA", + "STRAUB", + "STREETS", + "STRIKMAN", + "STRINGS", + "STRONG", + "STRUMIA", + "SUBATECH", + "SUCHER", + "SUMMERS", + "SUNYSB", + "SUSLOV", + "SUSSEX", + "SUSYCP", + "SUSYLR", + "SUSYQM", + "SUZUKI", + "SYSTOLE", + "TAKASAKI", + "TAMBINI", + "TATSUNO", + "TAUPHY", + "TAURUS", + "TAVZUR", + "TAYLOR", + "TAYLORT", + "TCDMATH", + "TCHAPOVSKI", + "TCILATEX", + "TDASGUPTA", + "TECHNION", + "TEMPLE", + "TEORICA", + "TERAOMS", + "TESCHNER", + "TESFAYE", + "TEVATRON", + "TEXIER", + "TEYSSIER", + "THEMISTOCLE", + "THEOCHEM", + "THEORY", + "THIBON", + "TILLEY", + "TITARCHUK", + "TITCMT", + "TOISFTT", + "TORNQVIST", + "TORRES", + "TOYAMA", + "TPALEV", + "TRIESTE", + "TRISTAN", + "TRIUMF", + "TROSHIN", + "TSIKLAURI", + "TURBOC", + "TURLER", + "TURNER", + "UAM", + "UBGVRI", + "UBVRIJHK", + "UBVRIJHKL", + "UCDPHY", + "UCRHEP", + "UCSBTH", + "UDEHEP", + "UFSCAR", + "UICHEP", + "UMDEPP", + "UMIACS", + "UNDPDK", + "UNICAMP", + "UNIGRAZ", + "UPCMAT", + "UQMATH", + "URALTSEV", + "USTRON", + "UTEXAS", + "UVSFPFT", + "VALENCIA", + "VALTANCOLI", + "VANKOLCK", + "VASILEVICH", + "VERANO", + "VERBMOBIL", + "VERGASSOLA", + "VERITAS", + "VERSION", + "VESSELIN", + "VICENTE", + "VILLANTE", + "VIRM", + "VISHAL", + "VISITOR", + "VITALY", + "VIVIANE", + "VIVIANI", + "VIVITRON", + "VLASOV", + "VNIIMS", + "VOGELSANG", + "VOITIKOVA", + "VOLOSHIN", + "WALTER", + "WATPHY", + "WATPHYS", + "WEINBERG", + "WEIZMANN", + "WETDST", + "WGACAT", + "WITHOUT", + "WUGRAV", + "WUSTHOFF", + "WYFFOS", + "XXVIII", + "XXXIII", + "YAMADA", + "YAMAGATA", + "YAMAMOTO", + "YAMAWAKI", + "YELKHOVSKY", + "YERPHI", + "YOSHIOKA", + "YOUSUF", + "YUKALOV", + "YUKAWA", + "ZABRODIN", + "ZACHOS", + "ZALEWSKI", + "ZFITTER", + "ZHIROV", +] +"""Words of 6 characters or more that are known to be correctly capitalized.""" diff --git a/agent/agent/__init__.py b/agent/agent/__init__.py new file mode 100644 index 0000000..15664bb --- /dev/null +++ b/agent/agent/__init__.py @@ -0,0 +1,111 @@ +""" +Orchestrates backend processes based on rules triggered by submission events. + +The primary concerns of the agent are: + +- Orchestrating automated processes in support of submission and moderation. +- Keeping track of what processes have been carried out on a submission, and + the outcomes of those processes. +- Providing a framework for defining conditions under which processes should be + carried out. + +In addition, we anticipate future development of: + +- Interfaces for administrators to monitor submission-related processes, and + to start processes manually. +- A metrics endpoint for [Prometheus](https://prometheus.io/), to expose process performance/rates. +- Interfaces for administrators to define processing rules. + +Conceptual overview +=================== + +A **process** is a set of one or more related steps that should be carried out +in order, usually focusing on a single submission. Steps are small units of +work with a specific objective, such as getting a resource from a service or +applying a policy. If a step in a process fails, the subsequent steps are not +carried out. Examples of processes include running the autoclassifier and +annotating a submission with the results, and placing submissions on hold when +they exceed size limits. + +Processes are implemented by defining a class that inherits from +:class:`.Process`\. + +A **rule** defines the circumstances under which a process should be carried +out. Specifically, a rule is associated with a particular type of event, and a +function that determines whether the process should be carried out based on the +event properties and/or the state of the submission. + +Rules are implemented by instantiating :class:`.Rule` in :mod:`.rules`. + +An **event** is a specific mutation of a submission by an actor at a particular +point in time. See :mod:`arxiv.submission` for an overview of the event model +used in the submission system. + +Events are implemented by defining an :class:`.Event` class in +:mod:`arxiv.submission`, and emitted via :func:`arxiv.submission.core.save`. + +Architectural overview +====================== + +Context +------- +The agent operates within the scope of the submission system. + +.. _figure-submission-agent-context: + +.. figure:: _static/diagrams/submission-agent-context.png + :width: 600px + + System context for the arXiv submission agent. + +The submission agent consumes submission events generated by other applications +running in the submission system, such as the submission UI, via the +``SubmissionEvents`` Kinesis stream. The agent uses the :mod:`arxiv.submission` +package to generate new events, which involves writing to the submission +database and putting records on the ``SubmissionEvents`` Kinesis stream. + +In carrying out processes, the agent makes requests to backend services in the +submission system, such as the plain text extraction service, file management +service, etc. + +Containers +---------- +The submission agent is comprised of four containers that are deployed and +scaled more or less independently. + +.. _figure-submission-agent-containers: + +.. figure:: _static/diagrams/submission-agent-containers.png + :width: 600px + + Containers within the arXiv submission agent. + + +The :mod:`agent.consumer`consumes notifications about events on the +``SubmissionEvents`` Kinesis stream. It is implemented on top of +:mod:`arxiv.integration.kinesis`. The agent consumes events on the stream one +at a time, in order, and keeps track of its progress by marking checkpoints in +a database. The agent also uses the database to commemorate process-relevant +submission events. In the event that an agent process goes away, this allows +us to resume processing the stream while minimizing the amount of duplicated +work. The agent dispatches steps in triggered processes to be carried out by +the :mod:`agent.worker`. Only one agent process should run per shard to avoid +processing the same events more than once. + +The agent database is a MariaDB SQL database used by the consumer. It stores +checkpoints, process-relevant submission events, and (future) configurations +for user-defined rules. + +The :mod:`agent.worker` is an horizontally-scalable `Celery +`_ worker that carries out the steps of +processes. These tasks are dispatched by the :mod:`agent.consumer` via a +Redis in-memory key-value store. The worker is responsible for calling backend +services as it carries out its work. Worker processes can be scaled +horizontally independently of agent processes. +""" +from . import process, rules, runner, consumer, factory + + +# Prepare all Process types for asynchronous execution. +for process_type in process.Process.__subclasses__(): + runner.AsyncProcessRunner.prepare(process_type) diff --git a/agent/agent/alembic.ini b/agent/agent/alembic.ini new file mode 100644 index 0000000..d802203 --- /dev/null +++ b/agent/agent/alembic.ini @@ -0,0 +1,74 @@ +# A generic, single database configuration. + +[alembic] +# path to migration scripts +script_location = migrations + +# template used to generate migration files +# file_template = %%(rev)s_%%(slug)s + +# timezone to use when rendering the date +# within the migration file as well as the filename. +# string value is passed to dateutil.tz.gettz() +# leave blank for localtime +# timezone = + +# max length of characters to apply to the +# "slug" field +#truncate_slug_length = 40 + +# set to 'true' to run the environment during +# the 'revision' command, regardless of autogenerate +# revision_environment = false + +# set to 'true' to allow .pyc and .pyo files without +# a source .py file to be detected as revisions in the +# versions/ directory +# sourceless = false + +# version location specification; this defaults +# to alembic/versions. When using multiple version +# directories, initial revisions must be specified with --version-path +# version_locations = %(here)s/bar %(here)s/bat alembic/versions + +# the output encoding used when revision files +# are written from script.py.mako +# output_encoding = utf-8 + +sqlalchemy.url = driver://user:pass@localhost/dbname + + +# Logging configuration +[loggers] +keys = root,sqlalchemy,alembic + +[handlers] +keys = console + +[formatters] +keys = generic + +[logger_root] +level = WARN +handlers = console +qualname = + +[logger_sqlalchemy] +level = WARN +handlers = +qualname = sqlalchemy.engine + +[logger_alembic] +level = INFO +handlers = +qualname = alembic + +[handler_console] +class = StreamHandler +args = (sys.stderr,) +level = NOTSET +formatter = generic + +[formatter_generic] +format = %(levelname)-5.5s [%(name)s] %(message)s +datefmt = %H:%M:%S diff --git a/agent/agent/config.py b/agent/agent/config.py new file mode 100644 index 0000000..dcc76f2 --- /dev/null +++ b/agent/agent/config.py @@ -0,0 +1,520 @@ +"""Submission agent configuration parameters.""" + +from os import environ +import warnings +from kombu.serialization import register +from .serializer import dumps, loads + +NAMESPACE = environ.get('NAMESPACE') +"""Namespace in which this service is deployed; to qualify keys for secrets.""" + +LOGLEVEL = int(environ.get('LOGLEVEL', '20')) +""" +Logging verbosity. + +See `https://docs.python.org/3/library/logging.html#levels`_. +""" + +JWT_SECRET = environ.get('JWT_SECRET') +"""Secret key for signing + verifying authentication JWTs.""" + +if not JWT_SECRET: + warnings.warn('JWT_SECRET is not set; authn/z may not work correctly!') + +CORE_VERSION = "0.0.0" +"""Version of the :mod:`arxiv.submission` package.""" + +MAX_SAVE_RETRIES = 25 +"""Number of times to retry storing/emiting a submission event.""" + +DEFAULT_SAVE_RETRY_DELAY = 30 +"""Delay between retry attempts when storing/emiting a submission event.""" + +WAIT_FOR_SERVICES = bool(int(environ.get('WAIT_FOR_SERVICES', '0'))) +"""Disable/enable waiting for upstream services to be available on startup.""" +if not WAIT_FOR_SERVICES: + warnings.warn('Awaiting upstream services is disabled; this should' + ' probably be enabled in production.') + +WAIT_ON_STARTUP = int(environ.get('WAIT_ON_STARTUP', '0')) +"""Number of seconds to wait before checking upstream services on startup.""" + +ENABLE_CALLBACKS = bool(int(environ.get('ENABLE_CALLBACKS', '1'))) +"""Enable/disable the :func:`Event.bind` feature.""" + + +# --- VAULT INTEGRATION CONFIGURATION --- + +VAULT_ENABLED = bool(int(environ.get('VAULT_ENABLED', '0'))) +"""Enable/disable secret retrieval from Vault.""" + +if not VAULT_ENABLED: + warnings.warn('Vault integration is disabled') + +KUBE_TOKEN = environ.get('KUBE_TOKEN', 'fookubetoken') +"""Service account token for authenticating with Vault. May be a file path.""" + +VAULT_HOST = environ.get('VAULT_HOST', 'foovaulthost') +"""Vault hostname/address.""" + +VAULT_PORT = environ.get('VAULT_PORT', '1234') +"""Vault API port.""" + +VAULT_ROLE = environ.get('VAULT_ROLE', 'submission-agent') +"""Vault role linked to this application's service account.""" + +VAULT_CERT = environ.get('VAULT_CERT') +"""Path to CA certificate for TLS verification when talking to Vault.""" + +VAULT_SCHEME = environ.get('VAULT_SCHEME', 'https') +"""Default is ``https``.""" + +if VAULT_ENABLED and VAULT_SCHEME != 'https': + warnings.warn('Vault is not configured to use TLS; this is not safe for' + ' production!') + +NS_AFFIX = '' if NAMESPACE == 'production' else f'-{NAMESPACE}' +VAULT_REQUESTS = [ + {'type': 'generic', + 'name': 'JWT_SECRET', + 'mount_point': f'secret{NS_AFFIX}/', + 'path': 'jwt', + 'key': 'jwt-secret', + 'minimum_ttl': 3600}, + {'type': 'generic', + 'name': 'SQLALCHEMY_DATABASE_URI', + 'mount_point': f'secret{NS_AFFIX}/', + 'path': 'beta-mysql', + 'key': 'uri', + 'minimum_ttl': 360000}, + {'type': 'aws', + 'name': 'AWS_S3_CREDENTIAL', + 'mount_point': f'aws{NS_AFFIX}/', + 'role': environ.get('VAULT_CREDENTIAL')}, + {'type': 'database', + 'engine': environ.get('AGENT_DATABASE_ENGINE', 'mysql+mysqldb'), + 'host': environ.get('AGENT_DATABASE_HOST', 'localhost'), + 'database': environ.get('AGENT_DATABASE_NAME', 'agent'), + 'params': 'charset=utf8mb4', + 'port': environ.get('AGENT_DATABASE_PORT', '3306'), + 'name': 'SUBMISSION_AGENT_DATABASE_URI', + 'mount_point': f'database{NS_AFFIX}/', + 'role': 'submission-agent-write'} +] +"""Requests for Vault secrets.""" + + +# --- DATABASE CONFIGURATION --- + +CLASSIC_DATABASE_URI = environ.get('CLASSIC_DATABASE_URI', 'sqlite:///') +"""Full database URI for the classic system.""" + +SQLALCHEMY_DATABASE_URI = CLASSIC_DATABASE_URI +"""Full database URI for the classic system.""" + +SQLALCHEMY_TRACK_MODIFICATIONS = False +"""Track modifications feature should always be disabled.""" + +SUBMISSION_AGENT_DATABASE_URI = environ.get('SUBMISSION_AGENT_DATABASE_URI', + 'sqlite:///') +"""Full database URI for the agent checkpoint database.""" + +SQLALCHEMY_BINDS = {'agent': SUBMISSION_AGENT_DATABASE_URI} +""" +Binding for the agent checkpoint database. + +See `https://flask-sqlalchemy.palletsprojects.com/en/2.x/binds/`_. +""" + +# --- AWS CONFIGURATION --- + +AWS_ACCESS_KEY_ID = environ.get('AWS_ACCESS_KEY_ID', 'nope') +""" +Access key for requests to AWS services. + +If :const:`VAULT_ENABLED` is ``True``, this will be overwritten. +""" + +AWS_SECRET_ACCESS_KEY = environ.get('AWS_SECRET_ACCESS_KEY', 'nope') +""" +Secret auth key for requests to AWS services. + +If :const:`VAULT_ENABLED` is ``True``, this will be overwritten. +""" + +AWS_REGION = environ.get('AWS_REGION', 'us-east-1') +"""Default region for calling AWS services.""" + + +# --- KINESIS CONFIGURATION --- + +KINESIS_STREAM = environ.get("KINESIS_STREAM", "SubmissionEvents") +"""Name of the stream on which to produce and consume events.""" + +KINESIS_SHARD_ID = environ.get("KINESIS_SHARD_ID", "0") +""" +Shard ID for this agent instance. + +There must only be one agent process running per shard. +""" + +KINESIS_START_TYPE = environ.get("KINESIS_START_TYPE", "TRIM_HORIZON") +"""Start type to use when no checkpoint is available.""" + +KINESIS_ENDPOINT = environ.get("KINESIS_ENDPOINT", None) +""" +Alternate endpoint for connecting to Kinesis. + +If ``None``, uses the boto3 defaults for the :const:`AWS_REGION`. This is here +mainly to support development with localstack or other mocking frameworks. +""" + +KINESIS_VERIFY = bool(int(environ.get("KINESIS_VERIFY", "1"))) +""" +Enable/disable TLS certificate verification when connecting to Kinesis. + +This is here support development with localstack or other mocking frameworks. +""" + +if not KINESIS_VERIFY: + warnings.warn('Certificate verification for Kinesis is disabled; this' + ' should not be disabled in production.') + + +# --- CELERY CONFIGURATION --- + +BROKER_URL = environ.get('SUBMISSION_AGENT_BROKER_URL', + 'redis://localhost:6379/0') +"""The full URL for the task broker.""" + +RESULT_BACKEND = environ.get('SUBMISSION_AGENT_RESULT_BACKEND', BROKER_URL) +""" +The full URL for the result backend. + +Currently we use the same backend for both queuing and storing results. +""" + +QUEUE_NAME_PREFIX = environ.get('SUBMISSION_AGENT_QUEUE_NAME_PREFIX', + 'submission-agent-') +"""Used to differentiate our tasks from those of others on a shared broker.""" + +TASK_DEFAULT_QUEUE = 'submission-worker' + +PREFETCH_MULTIPLIER = int(environ.get( + 'SUBMISSION_AGENT_WORKER_PREFETCH_MULTIPLIER', + '1' +)) +"""Number of tasks to be fetched at once by each worker.""" + +TASK_ACKS_LATE = bool(int(environ.get('SUBMISSION_AGENT_TASK_ACKS_LATE', '1'))) +"""If True (default), tasks are acknowledged after they are completed.""" + +# Configure Celery to use our custom JSON serializer. +register('process-json', dumps, loads, + content_type='application/x-process-json', + content_encoding='utf-8') +CELERY_ACCEPT_CONTENT = ['process-json'] +"""Serialization formats supported by Celery.""" + +CELERY_TASK_SERIALIZER = 'process-json' +"""Serializer for Celery tasks.""" + +CELERY_RESULT_SERIALIZER = 'process-json' +"""Serialize for celery results.""" + + +# --- UPSTREAM SERVICE INTEGRATIONS --- +# +# See https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables +# for details on service DNS and environment variables in k8s. + +# Integration with the file manager service. +FILEMANAGER_HOST = environ.get('FILEMANAGER_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the filemanager service.""" + +FILEMANAGER_PORT = environ.get('FILEMANAGER_SERVICE_PORT', '443') +"""Port for the filemanager service.""" + +FILEMANAGER_PROTO = environ.get( + f'FILEMANAGER_PORT_{FILEMANAGER_PORT}_PROTO', + environ.get('FILEMANAGER_PROTO', 'https') +) +"""Protocol for the filemanager service.""" + +FILEMANAGER_PATH = environ.get('FILEMANAGER_PATH', '').lstrip('/') +"""Path at which the filemanager service is deployed.""" + +FILEMANAGER_ENDPOINT = environ.get( + 'FILEMANAGER_ENDPOINT', + '%s://%s:%s/%s' % (FILEMANAGER_PROTO, FILEMANAGER_HOST, + FILEMANAGER_PORT, FILEMANAGER_PATH) +) +""" +Full URL to the root filemanager service API endpoint. + +If not explicitly provided, this is composed from :const:`FILEMANAGER_HOST`, +:const:`FILEMANAGER_PORT`, :const:`FILEMANAGER_PROTO`, and +:const:`FILEMANAGER_PATH`. +""" + +FILEMANAGER_VERIFY = bool(int(environ.get('FILEMANAGER_VERIFY', '1'))) +"""Enable/disable SSL certificate verification for filemanager service.""" + +FILEMANAGER_STATUS_TIMEOUT \ + = float(environ.get('FILEMANAGER_STATUS_TIMEOUT', 1.0)) + +if FILEMANAGER_PROTO == 'https' and not FILEMANAGER_VERIFY: + warnings.warn('Certificate verification for filemanager is disabled; this' + ' should not be disabled in production.') + +# Integration with the compiler service. +COMPILER_HOST = environ.get('COMPILER_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the compiler service.""" + +COMPILER_PORT = environ.get('COMPILER_SERVICE_PORT', '443') +"""Port for the compiler service.""" + +COMPILER_PROTO = environ.get( + f'COMPILER_PORT_{COMPILER_PORT}_PROTO', + environ.get('COMPILER_PROTO', 'https') +) +"""Protocol for the compiler service.""" + +COMPILER_PATH = environ.get('COMPILER_PATH', '') +"""Path at which the compiler service is deployed.""" + +COMPILER_ENDPOINT = environ.get( + 'COMPILER_ENDPOINT', + '%s://%s:%s/%s' % (COMPILER_PROTO, COMPILER_HOST, COMPILER_PORT, + COMPILER_PATH) +) +""" +Full URL to the root compiler service API endpoint. + +If not explicitly provided, this is composed from :const:`COMPILER_HOST`, +:const:`COMPILER_PORT`, :const:`COMPILER_PROTO`, and :const:`COMPILER_PATH`. +""" + +COMPILER_VERIFY = bool(int(environ.get('COMPILER_VERIFY', '1'))) +"""Enable/disable SSL certificate verification for compiler service.""" + +COMPILER_STATUS_TIMEOUT \ + = float(environ.get('COMPILER_STATUS_TIMEOUT', 1.0)) + +if COMPILER_PROTO == 'https' and not COMPILER_VERIFY: + warnings.warn('Certificate verification for compiler is disabled; this' + ' should not be disabled in production.') + +# Integration with the classifier service. +CLASSIFIER_HOST = environ.get('CLASSIFIER_SERVICE_HOST', 'localhost') +"""Hostname or addreess of the classifier service.""" + +CLASSIFIER_PORT = environ.get('CLASSIFIER_SERVICE_PORT', '8000') +"""Port for the classifier service.""" + +CLASSIFIER_PROTO = environ.get( + f'CLASSIFIER_PORT_{CLASSIFIER_PORT}_PROTO', + environ.get('CLASSIFIER_PROTO', 'http') +) +"""Protocol for the classifier service.""" + +CLASSIFIER_PATH = environ.get('CLASSIFIER_PATH', '/classifier/') +"""Path at which the classifier service is deployed.""" + +CLASSIFIER_ENDPOINT = environ.get( + 'CLASSIFIER_ENDPOINT', + '%s://%s:%s/%s' % (CLASSIFIER_PROTO, CLASSIFIER_HOST, CLASSIFIER_PORT, + CLASSIFIER_PATH) +) +""" +Full URL to the root classifier service API endpoint. + +If not explicitly provided, this is composed from :const:`CLASSIFIER_HOST`, +:const:`CLASSIFIER_PORT`, :const:`CLASSIFIER_PROTO`, and +:const:`CLASSIFIER_PATH`. +""" + +CLASSIFIER_VERIFY = bool(int(environ.get('CLASSIFIER_VERIFY', '0'))) +"""Enable/disable SSL certificate verification for classifier service.""" + +CLASSIFIER_STATUS_TIMEOUT \ + = float(environ.get('CLASSIFIER_STATUS_TIMEOUT', 1.0)) + +if CLASSIFIER_PROTO == 'https' and not CLASSIFIER_VERIFY: + warnings.warn('Certificate verification for classifier is disabled; this' + ' should not be disabled in production.') + +# Integration with plaintext extraction service. +PLAINTEXT_HOST = environ.get('PLAINTEXT_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the plaintext extraction service.""" + +PLAINTEXT_PORT = environ.get('PLAINTEXT_SERVICE_PORT', '443') +"""Port for the plaintext extraction service.""" + +PLAINTEXT_PROTO = environ.get( + f'PLAINTEXT_PORT_{PLAINTEXT_PORT}_PROTO', + environ.get('PLAINTEXT_PROTO', 'https') +) +"""Protocol for the plaintext extraction service.""" + +PLAINTEXT_PATH = environ.get('PLAINTEXT_PATH', '') +"""Path at which the plaintext extraction service is deployed.""" + +PLAINTEXT_ENDPOINT = environ.get( + 'PLAINTEXT_ENDPOINT', + '%s://%s:%s/%s' % (PLAINTEXT_PROTO, PLAINTEXT_HOST, PLAINTEXT_PORT, + PLAINTEXT_PATH) +) +""" +Full URL to the root plaintext extraction service API endpoint. + +If not explicitly provided, this is composed from :const:`PLAINTEXT_HOST`, +:const:`PLAINTEXT_PORT`, :const:`PLAINTEXT_PROTO`, and :const:`PLAINTEXT_PATH`. +""" + +PLAINTEXT_VERIFY = bool(int(environ.get('PLAINTEXT_VERIFY', '1'))) +"""Enable/disable certificate verification for plaintext extraction service.""" + +PLAINTEXT_STATUS_TIMEOUT \ + = float(environ.get('PLAINTEXT_STATUS_TIMEOUT', 1.0)) + +if PLAINTEXT_PROTO == 'https' and not PLAINTEXT_VERIFY: + warnings.warn('Certificate verification for plaintext extraction service' + ' is disabled; this should not be disabled in production.') + +# Email notification configuration. +EMAIL_ENABLED = bool(int(environ.get('EMAIL_ENABLED', '1'))) +"""Enable/disable sending e-mail. Default is enabled (True).""" + +DEFAULT_SENDER = environ.get('DEFAULT_SENDER', 'noreply@arxiv.org') +"""Default sender address for e-mail.""" + +SUPPORT_EMAIL = environ.get('SUPPORT_EMAIL', "help@arxiv.org") +"""E-mail address for user support.""" + +SMTP_HOSTNAME = environ.get('SMTP_HOSTNAME', 'localhost') +"""Hostname for the SMTP server.""" + +SMTP_USERNAME = environ.get('SMTP_USERNAME', 'foouser') +"""Username for the SMTP server.""" + +SMTP_PASSWORD = environ.get('SMTP_PASSWORD', 'foopass') +"""Password for the SMTP server.""" + +SMTP_PORT = int(environ.get('SMTP_PORT', '0')) +"""SMTP service port.""" + +SMTP_LOCAL_HOSTNAME = environ.get('SMTP_LOCAL_HOSTNAME', None) +"""Local host name to include in SMTP request.""" + +SMTP_SSL = bool(int(environ.get('SMTP_SSL', '0'))) +"""Enable/disable SSL for SMTP. Default is disabled.""" + +if not SMTP_SSL: + warnings.warn('Certificate verification for SMTP is disabled; this' + ' should not be disabled in production.') + + +# --- URL GENERATION --- + +EXTERNAL_URL_SCHEME = environ.get('EXTERNAL_URL_SCHEME', 'https') +"""Scheme to use for external URLs.""" + +if EXTERNAL_URL_SCHEME != 'https': + warnings.warn('External URLs will not use HTTPS proto') + +BASE_SERVER = environ.get('BASE_SERVER', 'arxiv.org') +"""Base arXiv server.""" + +SERVER_NAME = environ.get('SERVER_NAME', "submit.arxiv.org") +"""The name of this server.""" + +URLS = [ + ("submission", "/", SERVER_NAME), + ("confirmation", "//confirmation", SERVER_NAME) +] +""" +URLs for external services, for use with :func:`flask.url_for`. + +This subset of URLs is common only within submit, for now - maybe move to base +if these pages seem relevant to other services. + +For details, see :mod:`arxiv.base.urls`. +""" + + +# --- CONFIGURATION FOR SUBMISSION POLICIES --- +# +# The following parameters were carried forward from the legacy system. In +# future versions, these should be stored in a configuration database that can +# be directly altered via administrative interfaces. +# +# Not all of these parameters may be directly used right now. + +# TODO: make this configurable +RECLASSIFY_PROPOSAL_THRESHOLD = 0.57 # Equiv. to logodds of 0.3. +"""This is the threshold for generating a proposal from a classifier result.""" + +# TODO: make this configurable. +LOW_STOP_PERCENT = 0.10 +"""This is the threshold for abornmally low stopword content by percentage.""" + +LOW_STOP = 400 +"""This is the threshold for abornmally low stopword content by count.""" + +HIGH_STOP_PERCENT = 0.30 +"""This is the threshold for abnormally high stopword content by percentage.""" + +MULTIPLE_LIMIT = 1.01 +"""Threshold for repeated text.""" + +LINENOS_LIMIT = 0 +"""Threshold for line numbers.""" + +TITLE_SIMILARITY_WINDOW = 3*365/12 # days +"""Number of days in the past to look for similar titles.""" + +TITLE_SIMILARITY_THRESHOLD = 0.7 +"""Jaccard similarity threshold for title similarity.""" + +METADATA_ASCII_THRESHOLD = 0.5 +"""Minimum ASCII content for titles and abstracts (0.-1.).""" + +COMPRESSED_PACKAGE_MAX_BYTES = 6_000_000 +"""Maximum size of a source package in bytes when compressed.""" + +UNCOMPRESSED_PACKAGE_MAX_BYTES = 18_000_000 +"""Maximum size of a source package in bytes when uncompressed.""" + +PDF_LIMIT_BYTES = 15_000_000 +"""The maximum size in bytes of the provided/compiled PDF.""" + +NO_RECLASSIFY_CATEGORIES = ( + 'cs.CE', # Interdisciplinary category (see ARXIVOPS-466). +) +""" +Don't make auto-proposals for these user-supplied primary categories. + +These categories may not be known to the classifier, or the +classifier-suggested alternatives may be consistently innaccurate. +""" + +NO_RECLASSIFY_ARCHIVES = ( + 'econ', # New September 2017. +) +""" +Don't make auto-proposals for these user-supplied primary archives. + +These categories may not be known to the classifier, or the +classifier-suggested alternatives may be consistently innaccurate. +""" + +AUTO_CROSS_FOR_PRIMARY = { + 'cs.LG': 'stat.ML', + 'stat.ML': 'cs.LG' +} +""" +When these categories are the primary, a corresponding cross will be suggested. + +Per ARXIVOPS-500. +""" diff --git a/agent/agent/consumer.py b/agent/agent/consumer.py new file mode 100644 index 0000000..0784ebb --- /dev/null +++ b/agent/agent/consumer.py @@ -0,0 +1,337 @@ +""" +Submission event consumer. + +The submission event consumer is responsible for monitoring submission events, +evaluating them against pre-defined rules, and triggering processes to be +carried out by the :mod:`agent.worker`. + +The consumer is implemented using :mod:`arxiv.integration.kinesis`, and +consumes events in the ``SubmissionEvents`` Kinesis stream. Events may +be generated by submission user interfaces, APIs, and backend components +that leverage the ``arxiv.submission`` core package. As events are consumed, +they are evaluated against a set of registered :class:`.Rule` instances, which +map event types and conditions to :class:`.ProcessType` classes. + +Consequent processes are not run in the consumer application, which is run as a +single thread. The consumer tries to move on as quickly as possible, so it uses +the :class:`.AsyncProcessRunner` to dispatch processes for parallel execution +by the worker. + +The event lifecycle from the perspective of the consumer looks like this: + +1. A command/event is generated by a submission service, using the + :mod:`arxiv.submission` package. The event is stored in the database, and + propagated via the ``SubmissionEvents`` Kinesis stream. The Kinesis payload + includes the event itself, and the state of the submission both before and + after the event was applied. +2. The event is consumed by the agent via the ``SubmissionEvents`` Kinesis + stream. +3. The agent evaluates the event against registered :class:`.Rule` instances, + using :func:`.rules.evaluate`. A :class:`.Rule` maps a condition (the event + type and event/submission properties) to a :class:`.Process`. +4. The agent dispatches any triggered :class:`.Proccess` instances to the + :mod:`agent.worker` using the :class:`.AsyncProcessRunner`. + + +Components +---------- + +.. _figure-submission-agent-consumer-components: + +.. figure:: _static/diagrams/submission-agent-consumer-components.png + :width: 600px + + Main components of the event consumer. + + +The :class:`SubmissionEventConsumer` defines how records from the +``SubmissionEvents`` stream are handled. This is the primary point of control +in the agent. As events are received, it relies on :mod:`agent.rules` to +determine what processes to carry out, and then dispatches those processes +to the :mod:`agent.worker` using the :class:`.AsyncProcessRunner`. + +The :class:`SubmissionEventConsumer` relies on the +:class:`.DatabaseCheckpointManager` to keep track of its progress in the +``SubmissionEvents`` stream. + +The :mod:`agent.services.database` integration module provides access to the +agent database. Specifically, it supports creating and loading checkpoints, +and storing information about process-relevant events. + +Processes are defined in :mod:`agent.process`. Each process is a subclass of +:class:`.Process`, and may have one or more steps. + +Rules are defined in :mod:`agent.rules`. Each rule is an instantiation of +:class:`.Rule` in the root of that module. It relies on the event types +defined in :mod:`arxiv.submission.domain.events`, and the processes defined in +:mod:`agent.process`. + +:mod:`agent.runner` provides tools for running :class:`.Process` instances. The +base :class:`.ProcessRunner` carries out the process in a single thread, which +may be useful for testing purposes. The runner used in production is the +:class:`.AsyncProcessRunner`, which manages registration and dispatching of +asynchronous tasks carried out by the :mod:`agent.worker`. + +""" + +import json +import os +import time +from typing import List, Any, Optional, Dict, Tuple, Union + +from flask import Flask +from retry import retry + +import boto3 +from botocore.exceptions import WaiterError, NoCredentialsError, \ + PartialCredentialsError, BotoCoreError, ClientError + +from arxiv.base import logging +from arxiv.integration.kinesis import consumer +from arxiv.vault.manager import ConfigManager +from arxiv.submission.serializer import loads +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.event import Event, AddProcessStatus + +from . import rules +from .services import database +from .factory import create_app +from .domain import Trigger +from .runner import AsyncProcessRunner +from .process import Process + +logger = logging.getLogger(__name__) +logger.propagate = False + + +class SubmissionEventConsumer(consumer.BaseConsumer): + """ + Consumes submission events, and dispatches processes based on rules. + + .. todo: + ARXIVNG-2041 Implement throttling control in base Kinesis integration + + """ + + sleep = 0.2 + sleep_after_credentials = 10 + + def __init__(self, *args: Any, config: Dict[str, Any] = {}, + **kwargs: Any) -> None: + """Initialize a secrets manager before starting.""" + self._config = config + self._app: Optional[Flask] = kwargs.pop('app', None) + super(SubmissionEventConsumer, self).__init__(*args, **kwargs) + if self._config.get('VAULT_ENABLED'): + logger.info('Vault enabled; getting secrets') + self._secrets = self._init_secrets() + self.update_secrets() + self._access_key = self._config.get('AWS_ACCESS_KEY_ID') + self._secret_key = self._config.get('AWS_SECRET_ACCESS_KEY') + + def _init_secrets(self) -> ConfigManager: + """ + Get a :class:`.ConfigManager` for secrets. + + If we have a Flask app, try to re-use an existing ConfigManager if + there is one available in the middlewares. + """ + if self._app is not None: + if 'VaultMiddleware' in self._app.middlewares: + return self._app.middlewares['VaultMiddleware'].secrets + return ConfigManager(self._config) + + def update_secrets(self) -> bool: + """Update any secrets that are out of date.""" + for key, value in self._secrets.yield_secrets(): + self._config[key] = value + os.environ[key] = str(value) + _access_key = self._config.get('AWS_ACCESS_KEY_ID') + _secret_key = self._config.get('AWS_SECRET_ACCESS_KEY') + if self._access_key != _access_key or self._secret_key != _secret_key: + self._access_key = _access_key + self._secret_key = _secret_key + return True + return False + + def process_records(self, start: str) -> Tuple[str, int]: + """Update secrets before getting a new batch of records.""" + if self._config.get('VAULT_ENABLED') and self.update_secrets(): + logger.info('Got new secrets; restarting after %i seconds', + self.sleep_after_credentials) + # From the docs: + # + # > Unfortunately, IAM credentials are eventually consistent with + # > respect to other Amazon services. If you are planning on using + # > these credential in a pipeline, you may need to add a delay of + # > 5-10 seconds (or more) after fetching credentials before they + # > can be used successfully. + # -- https://www.vaultproject.io/docs/secrets/aws/index.html#usage + time.sleep(self.sleep_after_credentials) + raise consumer.RestartProcessing('Got fresh credentials') + super_ret: Tuple[str, int] + super_ret = super(SubmissionEventConsumer, self).process_records(start) + return super_ret + + def process_record(self, record: dict) -> None: + """ + Evaluate an event against registered rules. + + Parameters + ---------- + data : bytes + partition_key : bytes + sequence_number : int + sub_sequence_number : int + + """ + logger.info(f'Processing record %s', record["SequenceNumber"]) + try: + data = loads(record['Data'].decode('utf-8')) + except json.decoder.JSONDecodeError as exc: + logger.error("Error (%s) while deserializing from data %s", + exc, record['Data']) + raise exc + + # It is possible that an incomplete or aberrant record will come + # through the stream. One example is the generation of a test + # notification that other services might use to verify their ability + # to write to the stream. + try: + event, before, after = data['event'], data['before'], data['after'] + except KeyError: + logger.info('Skipping record %s', record["SequenceNumber"]) + return + + # We want to keep track of process-related events, so that we can + # reconstruct what happened if necessary. + if type(event) is AddProcessStatus: + self._store_event(event) + + # rules.evaluate() will yield any processes and corresponding + # configuration paramters that are triggered by matching rules. + logger.debug('Evaluating event %s', event.event_id) + for process, params in rules.evaluate(event, before, after): + self._dispatch_process(process, params, event, before, after) + logger.debug('Done processing record %s', record["SequenceNumber"]) + + @retry(backoff=2, jitter=(0, 1), logger=logger) + def _store_event(self, event: AddProcessStatus) -> None: + logger.debug('Storing event %s', event) + database.store_event(event) + logger.debug('..stored.') + + @retry(backoff=2, jitter=(0, 1), logger=logger) + def _dispatch_process(self, process: Process, params: Dict[str, Any], + event: Event, before: Submission, + after: Submission) -> None: + trigger = Trigger(event=event, before=before, after=after, + actor=event.creator, params=params) + + logger.debug('starting process %s', process.name) + runner = AsyncProcessRunner(process) + runner.run(trigger) + logger.info('Event %s on submission %s caused %s with params %s', + event.event_id, event.submission_id, process.name, + params) + + def new_client(self) -> boto3.client: + """Generate a new Kinesis client.""" + params: Dict[str, Any] = {'region_name': self.region, + 'aws_access_key_id': self._access_key, + 'aws_secret_access_key': self._secret_key} + client_params: Dict[str, Any] = {} + if self.endpoint: + client_params['endpoint_url'] = self.endpoint + if self.verify is False: + client_params['verify'] = False + + logger.debug('New session with parameters: %s', params) + # We don't want to let boto3 manage the Session for us. + self._session = boto3.Session(**params) + + return self._session.client('kinesis', **client_params) + + def wait_for_stream(self, tries: int = 5, delay: int = 5, + max_delay: Optional[int] = None, backoff: int = 2, + jitter: Union[int, Tuple[int, int]] = 0) -> None: + """ + Wait for the stream to become available. + + If the stream becomes available, returns ``None``. Otherwise, raises + a :class:`.StreamNotAvailable` exception. + + Raises + ------ + :class:`.StreamNotAvailable` + Raised when the stream could not be reached. + + """ + waiter = self.client.get_waiter('stream_exists') + try: + logger.info(f'Waiting for stream {self.stream_name}') + waiter.wait( + StreamName=self.stream_name, + Limit=1, + ExclusiveStartShardId=self.shard_id + ) + except WaiterError as e: + msg = 'Failed to get stream while waiting' + logger.error(msg) + raise consumer.exceptions.StreamNotAvailable(msg) from e + except (PartialCredentialsError, NoCredentialsError) as e: + msg = 'Credentials missing or incomplete: %s' + logger.error(msg, e.msg) + raise consumer.exceptions.ConfigurationError(msg % e.msg) from e + logger.info('Done waiting') + + +class DatabaseCheckpointManager: + """Provides db-backed loading and updating of consumer checkpoints.""" + + def __init__(self, shard_id: str) -> None: + """Get the last checkpoint.""" + self.shard_id = shard_id + self.position = database.get_latest_position(self.shard_id) + + def checkpoint(self, position: str) -> None: + """Checkpoint at ``position``.""" + try: + database.store_position(position, self.shard_id) + self.position = position + except Exception as e: + raise consumer.CheckpointError('Could not checkpoint') from e + + +def process_stream(app: Flask, duration: Optional[int] = None) -> None: + """ + Configure and run the record processor. + + Parameters + ---------- + duration : int + Time (in seconds) to run record processing. If None (default), will + run "forever". + + """ + # We use the Flask application instance for configuration, and to manage + # integrations with metadata service, search index. + checkpointer = DatabaseCheckpointManager(app.config['KINESIS_SHARD_ID']) + consumer.process_stream(SubmissionEventConsumer, app.config, + checkpointmanager=checkpointer, + duration=duration, + extra=dict(app=app, config=app.config)) + + +def start_agent() -> None: + """Start the record processor.""" + app = create_app() + with app.app_context(): + database.await_connection() + if not database.tables_exist(): + database.create_all() + process_stream(app) + + +if __name__ == '__main__': + start_agent() diff --git a/agent/agent/domain.py b/agent/agent/domain.py new file mode 100644 index 0000000..10c4dcd --- /dev/null +++ b/agent/agent/domain.py @@ -0,0 +1,78 @@ +from typing import Any, Optional, List, Dict +from dataclasses import dataclass, field + +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.event import Event, event_factory +from arxiv.submission.domain.agent import Agent, agent_factory + + +@dataclass +class Trigger: + """ + Represents a trigger for a process. + + This will usually be an :class:`.Event`, but may also be directly triggered + by an actor (e.g. manually starting a process via an UI). + """ + + event: Optional[Event] = field(default=None) + """The event (if any) that triggered the process.""" + before: Optional[Submission] = field(default=None) + """The state of the submission prior to the :attr:`.event` (if any).""" + after: Optional[Submission] = field(default=None) + """ + The state of the submission after to the :attr:`.event` (if any). + + If the process was triggered directly by an :attr:`.actor`, this should + be the state of the submission at the time the process was triggered. + """ + actor: Optional[Agent] = field(default=None) + """The actor (if any) responsible for starting the process directly.""" + params: Dict[str, Any] = field(default_factory=dict) + """Configuration parameters for the process.""" + + def __post_init__(self) -> None: + """Make sure that all refs are domain objects.""" + if self.event and not isinstance(self.event, Event): + self.event = event_factory(**self.event) + if self.before and not isinstance(self.before, Submission): + self.before = Submission(**self.before) + if self.after and not isinstance(self.after, Submission): + self.after = Submission(**self.after) + if self.actor and not isinstance(self.actor, Agent): + self.actor = agent_factory(**self.actor) + + +@dataclass +class ProcessData: + """ + Represents data associated with a (possibly multi-step) process. + + As steps are completed, their return values are appended to + :attr:`.results`. + """ + + submission_id: int + """Identifier of the submission upon which the process is operating.""" + + process_id: str + """Unique identifier of a specific process instance.""" + + trigger: Trigger + """The original trigger condition for the process.""" + + results: List[Any] + """The results of each step in the process, in order.""" + + def __post_init__(self): + """Make sure that all refs are domain objects.""" + if not isinstance(self.trigger, Trigger): + self.trigger = Trigger(**self.trigger) + + def get_last_result(self) -> Any: + """Get the result of the most recent successful step.""" + return self.results[-1] + + def add_result(self, result: Any) -> None: + """Add a result from a successful step.""" + self.results.append(result) diff --git a/agent/agent/factory.py b/agent/agent/factory.py new file mode 100644 index 0000000..82c899b --- /dev/null +++ b/agent/agent/factory.py @@ -0,0 +1,97 @@ +from typing import Any, Dict, Callable, Mapping, List +import logging +import time + +from flask import Flask, Config +from collections import defaultdict + +from arxiv import mail, vault +from arxiv.base import Base, logging +from arxiv.base.middleware import wrap, request_logs +from arxiv.submission import init_app, wait_for +from arxiv.submission.services import Classifier, PlainTextService, Compiler, \ + classic +from . import config +from .services import database + +logger = logging.getLogger(__name__) + +Callback = Callable[['ConfigWithHooks', str, Any], None] + + +class ConfigWithHooks(Config): + """Config object that has __setitem__ hooks.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Make a place for hooks on init.""" + super(ConfigWithHooks, self).__init__(*args, **kwargs) + self._hooks: Mapping[str, List[Callback]] = defaultdict(list) + + def add_hook(self, key: str, hook: Callback) -> None: + """ + Add a callback/hook for a config key. + + The hook will be called when the ``key`` is set. + """ + self._hooks[key].append(hook) + + def __setitem__(self, key: str, value: Any) -> None: + """Set a config ``key``, and call registered hooks.""" + super(ConfigWithHooks, self).__setitem__(key, value) + for hook in self._hooks.get(key, []): + hook(self, key, value) + + +Flask.config_class = ConfigWithHooks # type: ignore + + +def update_binds(config: ConfigWithHooks, key: str, value: Any) -> None: + """Update :const:`.config.SQLALCHEMY_BINDS.`.""" + config['SQLALCHEMY_BINDS'] = { + 'agent': config['SUBMISSION_AGENT_DATABASE_URI'] + } + + +def create_app() -> Flask: + """Create a new agent application.""" + app = Flask(__name__) + app.config.from_object(config) + app.config.add_hook('SUBMISSION_AGENT_DATABASE_URI', update_binds) + + Base(app) + + # Register logging and secrets middleware. + middleware = [request_logs.ClassicLogsMiddleware] + if app.config['VAULT_ENABLED']: + middleware.insert(0, vault.middleware.VaultMiddleware) + wrap(app, middleware) + + # Make sure that we have all of the secrets that we need to run. + if app.config['VAULT_ENABLED']: + app.middlewares['VaultMiddleware'].update_secrets({}) + + # Initialize services. + database.init_app(app) + mail.init_app(app) + Classifier.init_app(app) + Compiler.init_app(app) + PlainTextService.init_app(app) + init_app(app) + + if app.config['WAIT_FOR_SERVICES']: + time.sleep(app.config['WAIT_ON_STARTUP']) + with app.app_context(): + wait_for(database) + wait_for(Classifier.current_session(), + timeout=app.config['CLASSIFIER_STATUS_TIMEOUT']) + wait_for(Compiler.current_session(), + timeout=app.config['COMPILER_STATUS_TIMEOUT']) + wait_for(PlainTextService.current_session(), + timeout=app.config['PLAINTEXT_STATUS_TIMEOUT']) + # FILEMANAGER_STATUS_TIMEOUT + logger.info('All upstream services are available; ready to start') + + with app.app_context(): + if not database.tables_exist(): + database.create_all() + return app diff --git a/agent/agent/migrations/README b/agent/agent/migrations/README new file mode 100644 index 0000000..98e4f9c --- /dev/null +++ b/agent/agent/migrations/README @@ -0,0 +1 @@ +Generic single-database configuration. \ No newline at end of file diff --git a/agent/agent/migrations/env.py b/agent/agent/migrations/env.py new file mode 100644 index 0000000..a1b1674 --- /dev/null +++ b/agent/agent/migrations/env.py @@ -0,0 +1,76 @@ +from __future__ import with_statement + +from logging.config import fileConfig + +from sqlalchemy import engine_from_config +from sqlalchemy import pool + +from alembic import context + +# this is the Alembic Config object, which provides +# access to the values within the .ini file in use. +config = context.config + +# Interpret the config file for Python logging. +# This line sets up loggers basically. +fileConfig(config.config_file_name) + +# add your model's MetaData object here +# for 'autogenerate' support +# from myapp import mymodel +# target_metadata = mymodel.Base.metadata +target_metadata = None + +# other values from the config, defined by the needs of env.py, +# can be acquired: +# my_important_option = config.get_main_option("my_important_option") +# ... etc. + + +def run_migrations_offline(): + """Run migrations in 'offline' mode. + + This configures the context with just a URL + and not an Engine, though an Engine is acceptable + here as well. By skipping the Engine creation + we don't even need a DBAPI to be available. + + Calls to context.execute() here emit the given string to the + script output. + + """ + url = config.get_main_option("sqlalchemy.url") + context.configure( + url=url, target_metadata=target_metadata, literal_binds=True + ) + + with context.begin_transaction(): + context.run_migrations() + + +def run_migrations_online(): + """Run migrations in 'online' mode. + + In this scenario we need to create an Engine + and associate a connection with the context. + + """ + connectable = engine_from_config( + config.get_section(config.config_ini_section), + prefix="sqlalchemy.", + poolclass=pool.NullPool, + ) + + with connectable.connect() as connection: + context.configure( + connection=connection, target_metadata=target_metadata + ) + + with context.begin_transaction(): + context.run_migrations() + + +if context.is_offline_mode(): + run_migrations_offline() +else: + run_migrations_online() diff --git a/agent/agent/migrations/script.py.mako b/agent/agent/migrations/script.py.mako new file mode 100644 index 0000000..2c01563 --- /dev/null +++ b/agent/agent/migrations/script.py.mako @@ -0,0 +1,24 @@ +"""${message} + +Revision ID: ${up_revision} +Revises: ${down_revision | comma,n} +Create Date: ${create_date} + +""" +from alembic import op +import sqlalchemy as sa +${imports if imports else ""} + +# revision identifiers, used by Alembic. +revision = ${repr(up_revision)} +down_revision = ${repr(down_revision)} +branch_labels = ${repr(branch_labels)} +depends_on = ${repr(depends_on)} + + +def upgrade(): + ${upgrades if upgrades else "pass"} + + +def downgrade(): + ${downgrades if downgrades else "pass"} diff --git a/agent/agent/process/__init__.py b/agent/agent/process/__init__.py new file mode 100644 index 0000000..146470c --- /dev/null +++ b/agent/agent/process/__init__.py @@ -0,0 +1,31 @@ +""" +Processes supported by this application. + +A **process** is a set of one or more related steps that should be carried out +in order, usually focusing on a single submission. Steps are small units of +work with a specific objective, such as getting a resource from a service or +applying a policy. If a step in a process fails, the subsequent steps are not +carried out. Examples of processes include running the autoclassifier and +annotating a submission with the results, and placing submissions on hold when +they exceed size limits. + +Processes are implemented by defining a class that inherits from +:class:`.Process`\. +""" + +from .base import Process, ProcessType, step, Recoverable, Failed, Retry +from .classification_and_content import \ + PlainTextExtraction, \ + RunAutoclassifier, \ + CheckStopwordPercent, \ + CheckStopwordCount +from .email_notifications import SendConfirmationEmail +from .metadata_checks import \ + CheckForSimilarTitles, \ + CheckTitleForUnicodeAbuse, \ + CheckAbstractForUnicodeAbuse +from .reclassification import \ + ProposeReclassification, \ + ProposeCrossListFromPrimaryCategory, \ + AcceptSystemCrossListProposals +from .size_limits import CheckPDFSize, CheckSubmissionSourceSize diff --git a/agent/agent/process/base.py b/agent/agent/process/base.py new file mode 100644 index 0000000..6a7d4a9 --- /dev/null +++ b/agent/agent/process/base.py @@ -0,0 +1,147 @@ +"""Provides the base representation of a process.""" + +from enum import Enum +from typing import Iterable, Tuple, Callable, Union, Optional, Any, \ + NamedTuple, List +from unittest import mock +from contextlib import contextmanager +from collections import OrderedDict +from uuid import uuid4 + +from arxiv.submission import Event, AddProcessStatus, Agent, Submission, System +from arxiv.submission.domain.process import ProcessStatus +from ..domain import Trigger + + +class Failed(RuntimeError): + """ + The process has failed and cannot recover. + + This exception should be raised when all recourse to recover has been + exhausted, and no further retries are possible or desired. + """ + + def __init__(self, msg: str, step_name: Optional[str] = None) -> None: + """Initialize with support for an optional ``step_name``.""" + super(Failed, self).__init__(msg) + self.step_name = step_name + + +class Recoverable(RuntimeError): + """The process failed, but there is some hope of recovery if retried.""" + + +class Retry(RuntimeError): + """The process should be retried.""" + + +class ProcessType(type): + """ + Metaclass for :class:`.Process`. + + The goal is to preserve the order of steps based on the order in which they + are defined on a :class:`.Process` subclass. + + Adds a property called ``steps`` to the class, which is a list of instance + methods that should be called in order to carry out the process. + """ + + @classmethod + def __prepare__(self, name, bases): + """Use a :class:`collections.OrderedDict` instead of a ``dict``.""" + return OrderedDict() + + def __new__(self, name: str, bases: Tuple[type], attrs: dict): + """Identify the ordered steps in the process.""" + steps = [step for base in bases for step in getattr(base, 'steps', [])] + steps += [obj for obj in attrs.values() if is_step(obj)] + attrs['steps'] = steps + return type.__new__(self, name, bases, attrs) + + +def step(max_retries: Optional[int] = 3, + delay: Optional[int] = 2, + backoff: Optional[int] = 2, + max_delay: Optional[int] = None, + jitter: Union[int, Tuple[int, int]] = 0) -> Callable: + """ + Mark an instance method as a step, and configure its retry characteristics. + + Parameters + ---------- + max_retries : int + If provided, the maximum number of times to retry the step. + """ + def deco(func: Callable) -> Callable: + setattr(func, '__is_step__', True) + setattr(func, 'name', func.__name__) + setattr(func, 'max_retries', max_retries) + setattr(func, 'delay', delay) + setattr(func, 'backoff', backoff) + setattr(func, 'max_delay', max_delay) + setattr(func, 'jitter', jitter) + return func + return deco + + +def is_step(func: Callable) -> bool: + return getattr(func, '__is_step__', None) is True + + +class Process(metaclass=ProcessType): + Status = ProcessStatus.Status + + def __init__(self, submission_id: int, + process_id: Optional[str] = None) -> None: + self.submission_id = submission_id + if process_id is None: + process_id = str(uuid4()) + self.process_id = process_id + + @property + def name(self): + return self.__class__.__name__ + + @property + def agent(self) -> Agent: + return System(self.name) + + @property + def step_names(self): + return [step.name for step in self.steps] + + def fail(self, exception: Optional[Exception] = None, + message: Optional[str] = None) -> None: + """Fail and make no further attempt to recover.""" + if message is None: + message = f'{self.__class__.__name__} failed fantastically' + if exception is not None: + raise Failed(message) from exception + raise Failed(message) + + def _success_status(self, step_name: str) -> Status: + """Get the appropriate status for successful completion of a step.""" + if self.step_names.index(step_name) == len(self.steps) - 1: + return Process.Status.SUCCEEDED + return Process.Status.IN_PROGRESS + + def before_start(self, trigger: Trigger, emit: Callable, *args, **kwargs): + """Emit a pending status before the process starts.""" + emit(AddProcessStatus(creator=self.agent, process=self.name, + status=Process.Status.PENDING, + process_id=self.process_id)) + + def on_failure(self, step_name: str, trigger: Trigger, + emit: Callable) -> None: + """Emit a failure status when the process fails.""" + emit(AddProcessStatus(creator=self.agent, process=self.name, + step=step_name, status=Process.Status.FAILED, + process_id=self.process_id)) + + def on_success(self, step_name: str, trigger: Trigger, + emit: Callable) -> None: + """Emit a success state when a step is completed.""" + emit(AddProcessStatus(creator=self.agent, process=self.name, + step=step_name, + status=self._success_status(step_name), + process_id=self.process_id)) diff --git a/agent/agent/process/classification_and_content.py b/agent/agent/process/classification_and_content.py new file mode 100644 index 0000000..3c9e301 --- /dev/null +++ b/agent/agent/process/classification_and_content.py @@ -0,0 +1,198 @@ +"""Extract text, and get suggestions, features, and flags from Classifier.""" + +from typing import Iterable, Optional, Callable, Tuple +from itertools import count +import time +from datetime import datetime +from pytz import UTC +from contextlib import contextmanager + +from arxiv.taxonomy import CATEGORIES, Category +from arxiv.integration.api import exceptions + +from arxiv.submission import AddClassifierResults, AddContentFlag, AddFeature +from arxiv.submission.domain.flag import Flag, ContentFlag +from arxiv.submission.domain.annotation import Feature +from arxiv.submission.domain.agent import Agent, User +from arxiv.submission.domain.process import ProcessStatus +from arxiv.submission.services import Classifier, PlainTextService +from arxiv.submission.services.plaintext import ExtractionFailed + +from .base import Process, step, Retry, Recoverable +from ..domain import Trigger + + +class PlainTextExtraction(Process): + """Extract plain text from a compiled PDF.""" + + def source_id(self, trigger: Trigger) -> int: + """Get the source ID for the submission content.""" + try: + return trigger.after.source_content.identifier + except AttributeError as exc: + self.fail(exc, 'No source content identifier on post-event state') + + def handle_plaintext_exception(self, exc: Exception) -> None: + """Handle exceptions raised when calling the plain text service.""" + exc_type = type(exc) + + if exc_type in (exceptions.BadResponse, exceptions.ConnectionFailed): + raise Recoverable('Encountered %s; try again' % exc) from exc + elif exc_type is ExtractionFailed: + self.fail(exc, 'Extraction service failed to extract text') + elif exc_type is exceptions.RequestFailed: + if exc.status_code >= 500: + msg = 'Extraction service choked: %i' % exc.status_code + raise Recoverable(msg) from exc + self.fail(exc, 'Unrecoverable exception: %i' % exc.status_code) + self.fail(exc, 'Unhandled exception') + + @step(max_retries=None) + def start_extraction(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Request extraction by the plain text service.""" + try: + PlainTextService.request_extraction(self.source_id(trigger)) + except Exception as exc: + self.handle_plaintext_exception(exc) + + @step(max_retries=None, delay=1, backoff=1, jitter=(0, 1)) + def poll_extraction(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Poll the plain text service until extraction is complete.""" + source_id = self.source_id(trigger) + try: + complete = PlainTextService.extraction_is_complete(source_id) + except Exception as exc: + self.handle_plaintext_exception(exc) + if not complete: + raise Retry('Not complete; try again') + + @step(max_retries=None) + def retrieve_content(self, previous: Optional, trigger: Trigger, + emit: Callable) -> bytes: + """Retrieve the extracted plain text.""" + source_id = self.source_id(trigger) + try: + return PlainTextService.retrieve_content(source_id) + except Exception as exc: + self.handle_plaintext_exception(exc) + + +class RunAutoclassifier(PlainTextExtraction): + """ + Extract plain text and poll the autoclassifier. + + In addition to generating classification suggestions, the current + implementation of the autoclassifier also generates features (like word + counts) and content flags (e.g. possible language issues, line numbers). + """ + + CLASSIFIER_FLAGS = { + '%stop': None, # We will handle this ourselves. + 'stops': None, # We will handle this ourselves. + 'language': ContentFlag.Type.LANGUAGE, + 'charset': ContentFlag.Type.CHARACTER_SET, + 'linenos': ContentFlag.Type.LINE_NUMBERS + } + + def handle_classifier_exception(self, exc: Exception) -> None: + """Handle exceptions raised when calling the classifier service.""" + exc_type = type(exc) + + if exc_type in (exceptions.BadResponse, exceptions.ConnectionFailed): + raise Recoverable('Encountered %s; try again' % exc) from exc + elif exc_type is exceptions.RequestFailed: + if exc.status_code >= 500: + msg = 'Classifier service choked: %i' % exc.status_code + raise Recoverable(msg) from exc + self.fail(exc, 'Unrecoverable exception: %i' % exc.status_code) + self.fail(exc, 'Unhandled exception') + + @step(max_retries=None) + def call_classifier(self, content: bytes, trigger: Trigger, + emit: Callable) -> None: + """Send plain text content to the autoclassifier.""" + try: + # The autoclassifier runs synchronously; it's pretty fast. + self.process_result(Classifier.classify(content), trigger, emit) + except Exception as exc: + self.handle_classifier_exception(exc) + + @step() + def process_result(self, result: Tuple, trigger: Trigger, + emit: Callable) -> None: + """Process the results returned by the autoclassifier.""" + suggestions, flags, counts = result + results = [{'category': suggestion.category, + 'probability': suggestion.probability} + for suggestion in suggestions] + emit(AddClassifierResults(creator=self.agent, results=results)) + + for flag in flags: + now = datetime.now(UTC).isoformat() + comment = "flag from classification succeeded at %s" % now + flag_type = self.CLASSIFIER_FLAGS.get(flag.key) + if flag_type is None: + continue + emit(AddContentFlag(creator=self.agent, flag_type=flag_type, + flag_data=flag.value, comment=comment)) + + emit(AddFeature(creator=self.agent, + feature_type=Feature.Type.CHARACTER_COUNT, + feature_value=counts.chars)) + emit(AddFeature(creator=self.agent, + feature_type=Feature.Type.PAGE_COUNT, + feature_value=counts.pages)) + emit(AddFeature(creator=self.agent, + feature_type=Feature.Type.STOPWORD_COUNT, + feature_value=counts.stops)) + emit(AddFeature(creator=self.agent, + feature_type=Feature.Type.WORD_COUNT, + feature_value=counts.words)) + emit(AddFeature(creator=self.agent, + feature_type=Feature.Type.STOPWORD_PERCENT, + feature_value=counts.stops/counts.words)) + + +class CheckStopwordPercent(Process): + """Check the submission content for too low percentage of stopwords.""" + + @step() + def check_stop_percent(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Flag the submission if the percentage of stopwords is too low.""" + feats = [feature for feature in trigger.after.features.values() + if feature.feature_type is Feature.Type.STOPWORD_PERCENT] + if not feats: + self.fail(message='No stopword percentage feature on submission') + + # TODO: we are assuming that there is only one. Is that ever not true? + if feats[0].feature_value < trigger.params['LOW_STOP_PERCENT']: + comment = "Classifier reports low stops or %stops" + emit(AddContentFlag(creator=self.agent, + flag_type=ContentFlag.Type.LOW_STOP_PERCENT, + flag_data=trigger.event.feature_value, + comment=comment)) + + +class CheckStopwordCount(Process): + """Check the submission content for too low stopword count.""" + + @step() + def check_stop_count(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Flag the submission if the number of stopwords is too low.""" + feats = [feature for feature in trigger.after.features.values() + if feature.feature_type is Feature.Type.STOPWORD_COUNT] + if not feats: + self.fail(message='No stopword count feature on submission') + + # TODO: we are assuming that there is only one. Is that ever not true? + if feats[0].feature_value < trigger.params['LOW_STOP']: + emit(AddContentFlag( + creator=self.agent, + flag_type=ContentFlag.Type.LOW_STOP, + flag_data=trigger.event.feature_value, + comment="Classifier reports low stops or %stops" + )) diff --git a/agent/agent/process/email_notifications.py b/agent/agent/process/email_notifications.py new file mode 100644 index 0000000..23c229f --- /dev/null +++ b/agent/agent/process/email_notifications.py @@ -0,0 +1,51 @@ +"""Rules for sending e-mail notifications.""" + +from typing import Iterable, Optional, Callable + +from flask import render_template + +from arxiv import mail +from arxiv.base import logging +from arxiv.base.globals import get_application_config + +from arxiv.submission.domain.event import Event, FinalizeSubmission +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.agent import Agent +from arxiv.submission import schedule + +from ..process import Process, step +from ..domain import Trigger + +logger = logging.getLogger(__name__) + + +class SendConfirmationEmail(Process): + """Send a confirmation e-mail to the submitter.""" + + @step(max_retries=None, backoff=4) + def send(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Send the e-mail.""" + try: + submission_id = trigger.after.submission_id + recipient = trigger.event.creator + except AttributeError as exc: + logger.error('Missing event or post-event submission state') + self.fail(exc, 'Missing event or post-event submission state') + + context = { + 'submission_id': submission_id, + 'submission': trigger.after, + 'arxiv_id': f'submit/{submission_id}', + 'announce_time': + schedule.next_announcement_time(trigger.after.submitted), + 'freeze_time': schedule.next_freeze_time(trigger.after.submitted), + } + logger.info('Sending confirmation email to %s for submission %i', + recipient.email, submission_id) + mail.send(recipient.email, + "Submission to arXiv received", + render_template("submission-core/confirmation-email.txt", + **context), + render_template("submission-core/confirmation-email.html", + **context)) diff --git a/agent/agent/process/metadata_checks.py b/agent/agent/process/metadata_checks.py new file mode 100644 index 0000000..3b7a8dd --- /dev/null +++ b/agent/agent/process/metadata_checks.py @@ -0,0 +1,216 @@ +"""Automated metadata checks.""" + +from datetime import datetime, timedelta +from typing import Set, List, Tuple, Iterable, Optional, Callable +from unidecode import unidecode +import string +from functools import lru_cache as memoize + +from arxiv.base.globals import get_application_config + +from arxiv.submission.domain.event import Event, SetTitle, SetAbstract, \ + RemoveFlag, AddMetadataFlag +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.agent import Agent, User +from arxiv.submission.domain.flag import MetadataFlag, ContentFlag, \ + PossibleDuplicate +from arxiv.submission.services import classic +from .util import is_ascii, below_ascii_threshold, proportion_ascii + +from ..process import Process, step +from ..domain import Trigger + +STOPWORDS = set('a,an,and,as,at,by,for,from,in,of,on,s,the,to,with,is,was,if,' + 'then,that,these,those,them,thus'.split(',')) + +REMOVE_PUNCTUATION = str.maketrans(string.punctuation, + ' '*len(string.punctuation)) +"""Translator that converts punctuation characters into single spaces.""" + + +# Original procedure from classic: +# +# Query Submission (``arXiv_submissions`` table) for submissions with titles +# that were created within the last 3 months. +# +# Select previous matches for this submission id, and delete them -- from +# SubmissionNearDuplicates (``arXiv_submission_near_duplicates`` table) +# +# Get a Jaccard similarity indexer function +# (arXiv::Submit::Jaccard::JaccardIndex->make_jaccard_indexer) +# +# Among the results of the arXiv_submissions query, find submissions that are +# more similar than some threshold, skipping any user-deleted submissions. +# +# For each match > threshold, add a new duplicate record to the +# ``arXiv_submission_near_duplicates`` table with its score, and create +# corresponding entries in the admin log (``arXiv_admin_log`` table). +class CheckForSimilarTitles(Process): + """ + Check for other submissions with very similar titles. + + Ask classic for titles of papers submitted within the last several months. + Add an annotation to the submission if a title is more similar to the + current submission's title than a configurable threshold. + """ + + def _get_title(self, trigger: Trigger) -> str: + try: + return trigger.after.metadata.title + except AttributeError as exc: + self.fail(exc, 'Missing title or post-event state') + + @step(max_retries=None) + def get_candidates(self, previous: Optional, trigger: Trigger, + emit: Callable) -> List[Tuple[int, str, Agent]]: + """Get candidate titles from the database.""" + title = self._get_title(trigger) + if not title: # Nothing to do. + return [] + # If the title has no tokens, there is nothing to do. + if not tokenized(title): + self.fail(message='No usable tokens in title') + """Get the time window for possible duplicate submissions.""" + + days = window(trigger.params['TITLE_SIMILARITY_WINDOW']) + candidates: List[Tuple[int, str, Agent]] = classic.get_titles(days) + return candidates + + @step() + def check_for_duplicates(self, candidates: List[Tuple[int, str, Agent]], + trigger: Trigger, emit: Callable) -> None: + """Look for very similar titles, and add flags if appropriate.""" + title = self._get_title(trigger) + if not title: # Nothing to do. + return [] + flag_type = MetadataFlag.Type.POSSIBLE_DUPLICATE_TITLE + + for flag_id, flag in trigger.after.flags.items(): + if isinstance(flag, MetadataFlag) and flag.flag_type is flag_type: + emit(RemoveFlag(creator=self.agent, flag_id=flag_id)) + + for ident, candidate_title, submitter in candidates: + if not candidate_title: # Nothing to do. + continue + similarity = jaccard(title, candidate_title) + if similarity > trigger.params['TITLE_SIMILARITY_THRESHOLD']: + emit(AddMetadataFlag( + creator=self.agent, + flag_type=flag_type, + flag_data={'submission_id': ident, + 'title': title, + 'owner': submitter, + 'similarity': similarity}, + field='title', + comment='possible duplicate title')) + + +class CheckTitleForUnicodeAbuse(Process): + """ + Screen for possible abuse of unicode in titles. + + We support unicode characters in titles, but this can get out of hand. + This rule adds a flag if the ratio of non-ASCII to ASCII characters + is too high. + """ + + def _get_title(self, trigger: Trigger) -> str: + try: + if trigger.after.metadata.title is None: + self.fail(message='Missing title or post-event state') + return trigger.after.metadata.title + except AttributeError as exc: + self.fail(exc, 'Missing title or post-event state') + + def _clear_previous_flags(self, trigger: Trigger, emit: Callable) -> None: + for flag_id, flag in trigger.after.flags.items(): + if isinstance(flag, MetadataFlag) and \ + flag.flag_type is MetadataFlag.Type.CHARACTER_SET and \ + flag.field == 'title': + emit(RemoveFlag(creator=self.agent, flag_id=flag_id)) + + @step() + def check_title(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Check title for low ASCII content.""" + self._clear_previous_flags(trigger, emit) + level = proportion_ascii(self._get_title(trigger)) + if level < trigger.params['METADATA_ASCII_THRESHOLD']: + comment = 'Possible excessive use of non-ASCII characters.' + emit(AddMetadataFlag(creator=self.agent, + flag_type=MetadataFlag.Type.CHARACTER_SET, + flag_data={'ascii': level}, + field='title', + comment=comment)) + + +# @SetAbstract.bind(condition=lambda *a: not is_system_event(*a)) +class CheckAbstractForUnicodeAbuse(Process): + """ + Screen for possible abuse of unicode in abstracts. + + We support unicode characters in abstracts, but this can get out of hand. + This rule adds a flag if the ratio of non-ASCII to ASCII characters + is too high. + """ + + def _get_abstract(self, trigger: Trigger) -> str: + try: + if trigger.after.metadata.abstract is None: + self.fail(message='Missing abstract or post-event state') + return trigger.after.metadata.abstract + except AttributeError as exc: + self.fail(exc, 'Missing abstract or post-event state') + + def _clear_previous_flags(self, trigger: Trigger, emit: Callable) -> None: + for flag_id, flag in trigger.after.flags.items(): + if isinstance(flag, MetadataFlag) and \ + flag.flag_type is MetadataFlag.Type.CHARACTER_SET and \ + flag.field == 'abstract': + emit(RemoveFlag(creator=self.agent, flag_id=flag_id)) + + @step() + def check_abstract(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Check abstract for low ASCII content.""" + self._clear_previous_flags(trigger, emit) + level = proportion_ascii(self._get_abstract(trigger)) + if level < trigger.params['METADATA_ASCII_THRESHOLD']: + comment = 'Possible excessive use of non-ASCII characters.' + emit(AddMetadataFlag(creator=self.agent, + flag_type=MetadataFlag.Type.CHARACTER_SET, + flag_data={'ascii': level}, + field='abstract', + comment=comment)) + + +@memoize(maxsize=1028) +def normalize(phrase: str) -> str: + """Prepare a phrase for tokenization.""" + return unidecode(phrase.lower()).translate(REMOVE_PUNCTUATION) + + +@memoize(maxsize=2056) +def tokenized(phrase: str) -> Set[str]: + """Split a phrase into tokens and remove stopwords.""" + return set(normalize(phrase).split()) - STOPWORDS + + +def intersection(phrase_a: str, phrase_b: str) -> int: + """Calculate the number tokens shared by two phrases.""" + return len(tokenized(phrase_a) & tokenized(phrase_b)) + + +def union(phrase_a: str, phrase_b: str) -> int: + """Calculate the total number tokens in two phrases.""" + return len(tokenized(phrase_a) | tokenized(phrase_b)) + + +def jaccard(phrase_a: str, phrase_b: str) -> float: + """Calculate the Jaccard similarity of two phrases.""" + return intersection(phrase_a, phrase_b) / union(phrase_a, phrase_b) + + +def window(days: int) -> datetime: + """Get a datetime from ``days`` days ago.""" + return datetime.now() - timedelta(days) diff --git a/agent/agent/process/reclassification.py b/agent/agent/process/reclassification.py new file mode 100644 index 0000000..27f78d6 --- /dev/null +++ b/agent/agent/process/reclassification.py @@ -0,0 +1,161 @@ +"""Reclassification policies.""" + +from typing import List, Iterable, Optional, Callable + +from arxiv.submission.domain.event import Event, AddContentFlag, AddProposal, \ + SetPrimaryClassification, AddProcessStatus, AddClassifierResults, \ + AddFeature, AddSecondaryClassification, AcceptProposal, FinalizeSubmission +from arxiv.submission.domain.event.base import Condition +from arxiv.submission.domain.annotation import ClassifierResult, Feature, \ + ClassifierResults +from arxiv.submission.domain.proposal import Proposal +from arxiv.submission.domain.flag import ContentFlag +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.agent import Agent, User, System +from arxiv.submission.domain.process import ProcessStatus +from arxiv.submission.services import classifier, plaintext + +from arxiv.taxonomy import CATEGORIES, Category + +from ..process import Process, step +from ..domain import Trigger + + +class ProposeReclassification(Process): + """Generate system classification proposals based on classifier results.""" + + def _get_archive(self, category: Category) -> Optional[str]: + return CATEGORIES[category]['in_archive'] + + def _in_the_same_archive(self, cat_a: Category, cat_b: Category) -> bool: + """Evaluate whether two categories are in the same archive.""" + return self._get_archive(cat_a) == self._get_archive(cat_b) + + def _get_results(self, trigger: Trigger): + try: + return [anno for anno in trigger.after.annotations.values() + if type(anno) is ClassifierResults][0].results + except AttributeError as exc: + self.fail(exc, 'Missing post-event state') + + def _skip(self, trigger: Trigger) -> bool: + """Determine whether to skip proposal-making altogether.""" + user_primary = trigger.after.primary_classification.category + skipped_categories = trigger.params['NO_RECLASSIFY_CATEGORIES'] + skipped_archives = trigger.params['NO_RECLASSIFY_ARCHIVES'] + return user_primary in skipped_categories \ + or self._get_archive(user_primary) in skipped_archives + + def _user_category_ranks_highly(self, trigger: Trigger) -> bool: + results = self._get_results(trigger) + user_primary = trigger.after.primary_classification.category + probs = {r['category']: r['probability'] for r in results} + return user_primary in probs and probs[user_primary] >= 0.5 + + def _find_candidate(self, trigger: Trigger) -> Optional[Category]: + proposal_threshold = trigger.params['RECLASSIFY_PROPOSAL_THRESHOLD'] + within: Optional[ClassifierResult] = None + without: Optional[ClassifierResult] = None + + user_primary = trigger.after.primary_classification.category + for result in self._get_results(trigger): + probability = result['probability'] + if self._in_the_same_archive(result['category'], user_primary): + if within is None or probability > within['probability']: + within = result + elif without is None or probability > without['probability']: + without = result + + if within and within['probability'] >= proposal_threshold: + return within['category'] + elif without and without['probability'] >= proposal_threshold: + return without['category'] + return None + + @step() + def propose_primary(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Propose a new primary classification, if appropriate.""" + results = self._get_results(trigger) + if len(results) == 0: # Nothing to do. + return + + if self._skip(trigger): + return + + user_primary = trigger.after.primary_classification.category + + # if the primary is not in the suggestions, or the primary has + # probability < 0.5 (logodds < 0) and there is an alternative, propose + # the alternatve (preference for within-archive). otherwise make no + # proposal + if self._user_category_ranks_highly(trigger): + return + + # the best alternative is the suggestion with the highest probability + # above 0.57 (logodds = 0.3); there may be a best alternative inside or + # outside of the selected primary archive, or both. + suggested_category = self._find_candidate(trigger) + if suggested_category is None: + return + + probs = probs = {r['category']: r['probability'] for r in results} + comment = f"selected primary {user_primary}" + if user_primary not in probs: + comment += " not found in classifier scores" + else: + comment += f" has probability {round(probs[user_primary], 3)}" + emit(AddProposal(creator=self.agent, + proposed_event_type=SetPrimaryClassification, + proposed_event_data={'category': suggested_category}, + comment=comment)) + + +class ProposeCrossListFromPrimaryCategory(Process): + """Propose a cross-list classification based on primary classification.""" + + @step() + def propose(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Make the proposal.""" + try: + user_primary = trigger.after.primary_classification.category + secondary_categories = trigger.after.secondary_categories + except AttributeError: + self.fail(message='Missing primary, secondary, or postevent state') + category_map = trigger.params['AUTO_CROSS_FOR_PRIMARY'] + suggested = category_map.get(user_primary, None) + if suggested and suggested not in secondary_categories: + emit(AddProposal(creator=self.agent, + proposed_event_type=AddSecondaryClassification, + proposed_event_data={'category': suggested}, + comment=f"{user_primary} is primary")) + + +class AcceptSystemCrossListProposals(Process): + """ + Accept any cross-list proposals generated by the system. + + This is a bit odd, since we likely generated the proposal in this very + thread...but this seems to be an explicit feature of the classic system. + """ + + @step() + def accept(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Accept pending system proposals for cross-list classification.""" + try: + proposals = trigger.after.proposals.items() + except AttributeError as exc: + self.fail(exc, 'Missing proposals or post-event state') + + for event_id, proposal in proposals: + if proposal.proposed_event_type is not AddSecondaryClassification: + continue + if proposal.status != Proposal.Status.PENDING: + continue + if type(proposal.creator) is System: + comment = "accept cross-list proposal from system" + emit(AcceptProposal(creator=self.agent, + proposal_id=event_id, + comment=comment)) diff --git a/agent/agent/process/size_limits.py b/agent/agent/process/size_limits.py new file mode 100644 index 0000000..49455e5 --- /dev/null +++ b/agent/agent/process/size_limits.py @@ -0,0 +1,126 @@ +"""Enforces size limit rules.""" + +from typing import Iterable, Union, Optional, Callable + +from arxiv.integration.api import exceptions + +from arxiv.submission.domain.event import Event, SetUploadPackage, \ + UpdateUploadPackage, AddHold, RemoveHold, AddProcessStatus +from arxiv.submission.domain.event.base import Condition +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + Hold, Compilation +from arxiv.submission.domain.flag import Flag, ContentFlag +from arxiv.submission.domain.annotation import Feature +from arxiv.submission.domain.agent import Agent, User +from arxiv.submission.domain.process import ProcessStatus +from arxiv.submission.services import plaintext, compiler +from arxiv.submission.auth import get_system_token, get_compiler_scopes + +from arxiv.taxonomy import CATEGORIES, Category +from ..process import Process, step, Recoverable +from ..domain import Trigger + +PackageEvent = Union[SetUploadPackage, UpdateUploadPackage] + + +class CheckSubmissionSourceSize(Process): + """When a new source package is attached, check for oversize source.""" + + @step() + def check(self, previous: Optional, trigger: Trigger, + emit: Callable) -> None: + """Perform the source size check procedure.""" + uncompressed_max = trigger.params['UNCOMPRESSED_PACKAGE_MAX_BYTES'] + compressed_max = trigger.params['COMPRESSED_PACKAGE_MAX_BYTES'] + try: + uncompressed_size = trigger.after.source_content.uncompressed_size + compressed_size = trigger.after.source_content.compressed_size + except AttributeError as exc: + self.fail(exc, 'Missing source content or post-event state') + + msg = f"{uncompressed_size} bytes; {compressed_size} bytes compressed" + + if uncompressed_size > uncompressed_max \ + or compressed_size > compressed_max: + # If the submission is already on hold for this reason, there is + # nothing left to do. + if Hold.Type.SOURCE_OVERSIZE in trigger.after.hold_types: + return + + emit(AddHold(creator=self.agent, + hold_type=Hold.Type.SOURCE_OVERSIZE, + hold_reason=msg)) + + # If the submission is on hold due to oversize, and the submission is + # no longer oversize, remove the hold. + else: + for event_id, hold in trigger.after.holds.items(): + if hold.hold_type is Hold.Type.SOURCE_OVERSIZE: + emit(RemoveHold(creator=self.agent, hold_event_id=event_id, + hold_type=Hold.Type.SOURCE_OVERSIZE, + removal_reason=msg)) + + +class CheckPDFSize(Process): + """When a PDF is compiled, check for oversize.""" + + def handle_compiler_exception(self, exc: Exception) -> None: + """Handle exceptions raised when calling the compiler service.""" + exc_type = type(exc) + + if exc_type in (exceptions.BadResponse, exceptions.ConnectionFailed): + raise Recoverable('Encountered %s; try again' % exc) from exc + elif exc_type is exceptions.RequestFailed: + if exc.status_code >= 500: + msg = 'Compiler service choked: %i' % exc.status_code + raise Recoverable(msg) from exc + self.fail(exc, 'Unrecoverable exception: %i' % exc.status_code) + self.fail(exc, 'Unhandled exception') + + @step(max_retries=None) + def get_size(self, previous: Optional, trigger: Trigger, + emit: Callable) -> int: + """Get the size of the compilation from the compiler service.""" + try: + source_id = trigger.after.source_content.identifier + source_state = trigger.after.source_content.checksum + except AttributeError as exc: + self.fail(exc, message='Missing compilation or post-event state') + compilation_id = Compilation.get_identifier(source_id, source_state) + scopes = get_compiler_scopes(compilation_id) + token = get_system_token(__name__, self.agent, scopes) + + try: + stat = compiler.Compiler.get_status(source_id, source_state, token, + Compilation.Format.PDF) + except Exception as exc: + self.handle_compiler_exception(exc) + if stat.status is Compilation.Status.IN_PROGRESS: + raise Recoverable('Compilation is stil in progress; try again') + elif stat.status is Compilation.Status.FAILED: + self.fail(message='Compilation failed; cannot get size of PDF') + return stat.size_bytes + + @step() + def evaluate_size(self, size_bytes: int, trigger: Trigger, + emit: Callable) -> int: + """Add or remove holds as appropriate.""" + pdf_limit_bytes = trigger.params['PDF_LIMIT_BYTES'] + if size_bytes > pdf_limit_bytes: + if Hold.Type.PDF_OVERSIZE in trigger.after.hold_types: + return # Already on hold for this reason; nothing to do. + emit(AddHold( + creator=self.agent, + hold_type=Hold.Type.PDF_OVERSIZE, + hold_reason=f'PDF is {size_bytes} bytes, exceeds limit of' + f' {pdf_limit_bytes} bytes')) + else: + # If the submission is on hold due to oversize, remove the hold. + for event_id, hold in trigger.after.holds.items(): + if hold.hold_type is Hold.Type.PDF_OVERSIZE: + emit(RemoveHold( + creator=self.agent, + hold_event_id=event_id, + hold_type=Hold.Type.PDF_OVERSIZE, + removal_reason=f'PDF is {size_bytes} bytes, below' + f' limit of {pdf_limit_bytes} bytes')) diff --git a/docs/.nojekyll b/agent/agent/process/tests/__init__.py similarity index 100% rename from docs/.nojekyll rename to agent/agent/process/tests/__init__.py diff --git a/agent/agent/process/tests/data/__init__.py b/agent/agent/process/tests/data/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent/agent/process/tests/data/titles.py b/agent/agent/process/tests/data/titles.py new file mode 100644 index 0000000..757f679 --- /dev/null +++ b/agent/agent/process/tests/data/titles.py @@ -0,0 +1,153 @@ +from arxiv.submission.domain.agent import User + +TITLES = [ + (2344371, 'Maximally Rotating Supermassive Stars at the Onset of Collapse: The Perturbative Effects of Gas Pressure, Magnetic Fields, Dark Matter and Dark Energy', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344372, 'Deep learning approach for Fourier ptychography microscopy', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344378, 'Implementing nonlinear Compton scattering beyond the local constant field approximation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344394, 'The role of gravity in the pair creation induced by electric fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344409, 'An analysis of high-frequency cryptocurrencies prices dynamics using permutation-information-theory quantifiers', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344417, 'Floer theory and flips', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344461, 'An Effect of The Radiofrequency Fields Over Saccharomyces Cerevisiae', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344489, 'On the field of moduli of superelliptic curves', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344491, 'Toward an Optimal Quantum Algorithm for Polynomial Factorization over Finite Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344506, 'Are 10 EeV cosmic rays extragalactic? Theory of cosmic ray diffusion at high energy', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344557, 'C-metric solution for conformal gravity with a conformally coupled scalar field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344572, 'On the theory of high-Tc superconductivity of doped cuprates', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344583, 'Controlling spin-orbit interactions in silicon quantum dots using magnetic field direction', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344585, 'An ostentatious model of cosmological scalar-tensor theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344593, 'Measurements and atomistic theory of electron $g$ factor anisotropy for phosphorus donors in strained silicon', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344618, 'Sending or not sending: twin-field quantum key distribution with large misalignment error', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344662, 'Density Estimation with Contaminated Data: Minimax Rates and Theory of Adaptation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344670, 'Non-linear charge oscillation driven by single-cycle light-field in an organic superconductor', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344743, 'Toward a quantitative theory of the hydrodynamic limit', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344768, 'The Needle in the Haystack for Theory of High Temperature Superconductivity', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344792, 'A transition between bouncing hyper-inflation to {\\Lambda}CDM from diffusive scalar fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344794, 'The quantum theory of a closed string', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344796, 'Probing Aqueous Electrolytes with Fourier Spectrum Pulse-Echo Technique', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344798, 'Confronting nuclear equation of state in the presence of dark matter using GW170817 observation in relativistic mean field theory approach', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344873, 'Dynamically probing strongly-coupled field theories with critical point', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344929, 'Galactic Magnetic Field Reconstruction I. Constraints from polarized diffuse emission: Methodology and simulations', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344950, 'Galactic Magnetic Field Reconstruction II. Constraints from polarized thermal dust sky as seen by $Planck$', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2344970, 'Theory of Coulomb Drag in Spatially Inhomogeneous Materials', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345010, 'Efficient Gauss-Newton-Krylov momentum conservation constrained PDE-LDDMM using the band-limited vector field parameterization', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345014, 'Nonrelativistic String Theory and T-Duality', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345067, 'Decay of a Thermofield-Double State in Chaotic Quantum Systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345109, 'Convex Optimization Based Bit Allocation for Light Field Compression under Weighting and Consistency Constraints', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345110, 'Smoothness of correlation functions in Liouville Conformal Field Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345129, 'Towards a directed homotopy type theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345148, 'Collective Coordinate Descriptions of Magnetic Domain Wall Motion in Perpendicularly Magnetized Nanostructures under the Application of In-plane Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345150, 'Collective Coordinate Descriptions of Magnetic Domain Wall Motion in Perpendicularly Magnetized Nanostructures under the Application of In-plane Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345209, 'Construction of MDS Self-dual Codes over Finite Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345229, 'Theta and eta polynomials in geometry, Lie theory, and combinatorics', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345238, 'Weak Cosmic Censorship Conjecture in Kerr-(Anti-)de Sitter Black Hole with Scalar Field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345248, 'Contramodules over pro-perfect topological rings, the covering property in categorical tilting theory, and homological ring epimorphisms', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345299, 'Learnable: Theory vs Applications', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345317, 'The importance of scalar fields as extradimensional metric components in Kaluza-Klein models', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345322, 'Theory of Single Susceptibility for Near-field Optics Equally Associated with Scalar and Vector Potentials', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345324, 'Spatially Inhomogeneous Population Dynamics: Beyond the Mean Field Approximation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345404, 'Analytical treatment of the wakefields driven by transversely shaped beams in a planar slow-wave structure', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345420, 'On s-injective and injective ray transforms of tensor fields on surfaces', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345425, 'Solar system science with the Wide-Field InfraRed Survey Telescope (WFIRST)', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345439, 'On the use of machine learning algorithms in the measurement of stellar magnetic fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345446, 'Circuit theory in projective space and homogeneous circuit models', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345447, 'Logical Fallacy of using the Electric Field in Non-resonant Near-field Optics', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345450, 'Generalized Lennard-Jones Potentials, SUSYQM and Differential Galois Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345475, 'A general framework for SPDE-based stationary random fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345482, 'Towards the Theory of the Yukawa Potential', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345521, 'One-Shot Optimal Topology Generation through Theory-Driven Machine Learning', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345617, 'In situ Electric Field Skyrmion Creation in Magnetoelectric Cu$_2$OSeO$_3$', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345619, 'Convergence With Probability 1 in the Method of Expansion of Multiple Ito Stochastic Integrals, Based on Multiple Fourier-Legendre Series', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345690, 'Theory of cavity-modified ground-state chemical reactivity', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345748, 'Estimates on Functional Integrals of Quantum Mechanics and Non-Relativistic Quantum Field Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345750, 'Construction of general symplectic field theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345758, 'A non-perturbative field theory approach for the Kondo effect: Emergence of an extra dimension and its implication for the holographic duality conjecture', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345760, 'Visible -Light-Gated Reconfigurable Rotation of Nanomotors in Electric Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345761, 'Energy condition respecting warp drives: The role of spin in Einstein-Cartan theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345856, 'A user model for JND-based video quality assessment: theory and applications', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345936, 'Exact Embeddings of JT Gravity in Strings and M-theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2345951, 'Exact Embeddings of JT Gravity in Strings and M-theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346034, 'Observation of Light Guiding by Artificial Gauge Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346060, 'Improved Fourier restriction estimates in higher dimensions', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346081, 'Calculation of the Cherenkov fields in the cross-section of a short relativistic bunch', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346157, 'Neutrino propagation in an electron background with an inhomogeneous magnetic field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346210, 'Learning Rigidity in Dynamic Scenes with a Moving Camera for 3D Motion Field Estimation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346217, 'Renormalization of Einstein-Maxwell theory at one-loop', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346230, 'Hodge Decomposition of the wall shear stress vector fields characterizing biological flows', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346248, 'SKIFFS: Superconducting Kinetic Inductance Field-Frequency Sensors for Sensitive Magnetometry in Moderate Background Magnetic Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346291, 'Estimating Failure in Brittle Materials using Graph Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346354, 'A new sum-product estimate in prime fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346434, 'Unsupervised Domain Adaptive Re-Identification: Theory and Practice', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346490, 'Levy Differential Operators and Gauge Invariant Equations for Dirac and Higgs Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346501, 'SDSS IV MaNGA: Characterizing Non-Axisymmetric Motions in Galaxy Velocity Fields Using the Radon Transform', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346524, 'Magnetic exchange and susceptibilities in fcc iron: A supercell dynamical mean-field theory study', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346558, 'The Vlasov-Navier-Stokes equations as a mean field limit', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346567, 'A Behavioural Theory for Interactions in Collective-Adaptive Systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346570, 'On growth of the set $A(A+1)$ in arbitrary finite fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346603, 'Contributions to Four-Position Theory with Relative Rotations', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346617, 'The Theory of Bonds II: Closed 6R Linkages with Maximal Genus', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346624, 'The Theory of Bonds: A New Method for the Analysis of Linkages', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346629, 'The observed galaxy bispectrum from single-field inflation in the squeezed limit', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346652, 'A Brief History of Algebra with a Focus on the Distributive Law and Semiring Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346682, 'The Theory of Inflation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346692, 'Gapped Boundary Theory of the Twisted Gauge Theory Model of Three-Dimensional Topological Orders', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346704, 'The Proca Field in Curved Spacetimes and its Zero Mass Limit', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346808, 'Krasovskii-Subbotin approach to mean field type differential games', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346823, 'Free field structure of the model with a spin-$\\frac{3}{2}$ Rarita-Schwinger field directly coupled to a spin-$\\frac{1}{2}$ field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346837, 'Percolation for level-sets of Gaussian free fields on metric graphs', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346856, 'Chern class and Riemann-Roch theorem for cohomology theory without homotopy invariance', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346881, 'Quantum and Classical Lyapunov Exponents in Atom-Field Interaction Systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346903, 'Mean Field Equilibria for Resource Competition in Spatial Settings', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346916, 'Anisotropic functional Fourier deconvolution with long-memory dependent errors: a minimax study', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2346973, 'A Simple Model for Non-Abelian T-duality and Double Field Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347084, 'Technical design and commissioning of the sensor net for fine meshed measuring of magnetic field at KATRIN Spectrometer', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347121, 'Learning Rigidity in Dynamic Scenes with a Moving Camera for 3D Motion Field Estimation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347137, 'Learning Rigidity in Dynamic Scenes with a Moving Camera for 3D Motion Field Estimation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347156, 'Learning Rigidity in Dynamic Scenes with a Moving Camera for 3D Motion Field Estimation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347195, 'On Some Topological Properties of Fourier Transforms of Regular Holonomic D-Modules', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347214, 'Equivariant Morita-Takeuchi Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347215, 'Intrinsic structural and electronic properties of the Buffer Layer on Silicon Carbide unraveled by Density Functional Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347270, 'Serre-Tate theory for Calabi-Yau varieties', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347280, 'Differential Weil Descent and Differentially Large Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347286, 'Curvature correction to the field emission current', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347287, 'Quench dynamics of the Ising field theory in a magnetic field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347309, 'Quench dynamics of the Ising field theory in a magnetic field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347321, 'Baryons under Strong Magnetic Fields or in Theories with Space-dependent $\\theta$-term', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347331, 'Implementing nonlinear Compton scattering beyond the local constant field approximation', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347362, 'Fractal AI: A fragile theory of intelligence', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347403, 'Direct 3D Tomographic Reconstruction and Phase-Retrieval of Far-Field Coherent Diffraction Patterns', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347415, 'Strong field QED in lepton colliders and electron/laser interactions', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347422, 'Strong field QED in lepton colliders and electron/laser interactions', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347424, 'Topological and Geometric Universal Thermodynamics in Conformal Field Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347430, 'Experimental signatures of the quantum nature of radiation reaction in the field of an ultra-intense laser', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347441, 'Incompatible deformation field and Riemann curvature tensor', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347442, 'The Electromagnetic Field and Radiation Reaction Force for Point Charged Particle with Magnetic Moment', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347454, 'Visibility of Shafarevich-Tate group of abelian varieties over number field extensions', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347487, 'Thermodynamic laws for populations and quantum coherence: A self-contained introduction to the resource theory approach to thermodynamics', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347499, 'Dense Light Field Reconstruction From Sparse Sampling Using Residual Network', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347504, 'A Survey on the Theory of Bonds', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347506, 'Gradient flow approach to local mean-field spin systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347534, 'Recovering P(X) from a canonical complex field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347535, 'Energy transfer from space-time into matter and a bouncing inflation from Covariant Canonical Gauge theory of Gravity', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347552, 'Embedding, simulation and consistency of $\\cal PT$ -symmetric quantum Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347571, 'Geometric Langlands Twists of N = 4 Gauge Theory from Derived Algebraic Geometry', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347583, 'Back Reaction of 4D Conformal Fields on Static Black-Hole Geometry', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347587, 'Hopfield Network based Control and Diagnostics System for Accelerators', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347590, 'Gradient flow approach to local mean-field spin systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347594, 'Gradient flow approach to local mean-field spin systems', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347618, 'RF amplification property of the MgO-based magnetic tunnel junction using field-induced ferromagnetic resonance', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347621, 'Spintronic Oscillator Based on Magnetic Field Feedback', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347645, 'A resource theory of entanglement with a unique multipartite maximally entangled state', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347648, 'Pluripotential Theory and Convex Bodies: Large Deviation Principle', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347652, 'The Decoupling of $\\bar\\Omega$ in String Theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347663, 'Probing shear-induced rearrangements in Fourier Space. II. Differential Dynamic Microscopy', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347694, 'Partial Fourier--Mukai transform for integrable systems with applications to Hitchin fibration', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347704, 'A theory of single-shot error correction for adversarial noise', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347757, 'A $\\mathrm{U}(2) \\times \\mathrm{U}(3)$ gauge theory extension of the standard model', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347759, 'Who needs category theory?', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347777, 'Combinatorial aspects of the Legendre and Fourier transforms in perturbative quantum field theory', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347794, 'Hopf Galois structures on separable field extensions of odd prime power degree', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347795, 'Initial conditions for nuclear collisions: theory overview', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347836, 'Superconductor in a weak static gravitational field', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347863, 'Strongly Dependent Ordered Abelian Groups and Henselian Fields', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])), + (2347886, 'On the variability of the solar mean magnetic field: contributions from various magnetic features on the surface of the Sun', User(native_id=12345, email='foo@baz.com', forename='', surname='', suffix='', identifier=None, affiliation='', endorsements=[])) +] diff --git a/agent/agent/process/tests/test_classification_and_content.py b/agent/agent/process/tests/test_classification_and_content.py new file mode 100644 index 0000000..eeed7a2 --- /dev/null +++ b/agent/agent/process/tests/test_classification_and_content.py @@ -0,0 +1,554 @@ +"""Tests for classification and content processing rules.""" + +from unittest import TestCase, mock +import copy +from datetime import datetime +from pytz import UTC +from arxiv.integration.api import status, exceptions + +from arxiv.submission.domain.event import ConfirmPreview, AddProcessStatus, \ + AddContentFlag, AddClassifierResults, AddFeature +from arxiv.submission.domain.agent import User, System +from arxiv.submission.domain.submission import Submission, SubmissionContent +from arxiv.submission.domain.process import ProcessStatus +from arxiv.submission.domain.flag import ContentFlag +from arxiv.submission.domain.annotation import Feature +from arxiv.submission.services import plaintext, classifier + +from .. import Failed, Recoverable +from .. import PlainTextExtraction, RunAutoclassifier, CheckStopwordCount, \ + CheckStopwordPercent +from .. import classification_and_content as c_and_c +from ...domain import Trigger +from ...runner import ProcessRunner +from ...factory import create_app +from .util import raise_http_exception + +sys = System(__name__) + + + +class TestRequestPlainTextContentExtraction(TestCase): + """Test :func:`PlainTextExtraction.start_extraction`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = ConfirmPreview(creator=self.creator) + self.process = PlainTextExtraction(self.submission.submission_id) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_start_extraction(self, mock_plaintext): + """We attempt to start plain text extraction.""" + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + res = self.process.start_extraction(None, trigger, events.append) + + self.assertIsNone(res, 'No result is returned.') + self.assertEqual(mock_plaintext.request_extraction.call_args[0][0], + self.submission.source_content.identifier, + 'Request for extraction is made with source ID.') + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_missing_source(self, mock_plaintext): + """There is no source on the submission.""" + submission_without_source = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC) + ) + trigger = Trigger(event=self.event, actor=self.creator, + before=submission_without_source, + after=submission_without_source) + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): + # Insufficient information to start extraction. + self.process.start_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_bad_response(self, mock_plaintext): + """The plain text service responds oddly which we hope is transient.""" + mock_plaintext.request_extraction.side_effect = \ + raise_http_exception(exceptions.BadResponse, 200) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.start_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_connection_failed(self, mock_plaintext): + """Cannot conntect to plain text service.""" + mock_plaintext.request_extraction.side_effect = \ + raise_http_exception(exceptions.ConnectionFailed, -1) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.start_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_bad_request(self, mock_plaintext): + """The request to the plain text service is malformed.""" + mock_plaintext.request_extraction.side_effect = \ + raise_http_exception(exceptions.BadRequest, 400) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): + # The process is explicitly failed. + self.process.start_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_internal_server_error(self, mock_plaintext): + """The plain text service is down.""" + mock_plaintext.request_extraction.side_effect = \ + raise_http_exception(exceptions.RequestFailed, 500) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The process is explicitly failed. + self.process.start_extraction(None, trigger, events.append) + + +class TestPollPlainTextContentExtraction(TestCase): + """Test :func:`PlainTextExtraction.start_extraction`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = ConfirmPreview(creator=self.creator) + self.process = PlainTextExtraction(self.submission.submission_id) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_poll_extraction(self, mock_plaintext): + """Check the status of the extraction.""" + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + res = self.process.poll_extraction(None, trigger, events.append) + + self.assertIsNone(res, 'No result is returned.') + self.assertEqual(mock_plaintext.extraction_is_complete.call_args[0][0], + self.submission.source_content.identifier, + 'Poll is made with source ID.') + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_poll_bad_response(self, mock_plaintext): + """The plain text service responds oddly which we hope is transient.""" + mock_plaintext.extraction_is_complete.side_effect = \ + raise_http_exception(exceptions.BadResponse, 200) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.poll_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_poll_connection_failed(self, mock_plaintext): + """Cannot conntect to plain text service.""" + mock_plaintext.extraction_is_complete.side_effect = \ + raise_http_exception(exceptions.ConnectionFailed, -1) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.poll_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_poll_bad_request(self, mock_plaintext): + """The request to the plain text service is malformed.""" + mock_plaintext.extraction_is_complete.side_effect = \ + raise_http_exception(exceptions.BadRequest, 400) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): + # The process is explicitly failed. + self.process.poll_extraction(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_poll_internal_server_error(self, mock_plaintext): + """The plain text service is down.""" + mock_plaintext.extraction_is_complete.side_effect = \ + raise_http_exception(exceptions.RequestFailed, 500) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The process is explicitly failed. + self.process.poll_extraction(None, trigger, events.append) + + +class TestRetrievePlainTextContentExtraction(TestCase): + """Test :func:`PlainTextExtraction.start_extraction`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = ConfirmPreview(creator=self.creator) + self.process = PlainTextExtraction(self.submission.submission_id) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_retrieve_content(self, mock_plaintext): + """Check the status of the extraction.""" + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + res = self.process.retrieve_content(None, trigger, events.append) + + self.assertIsNotNone(res, 'Raw content is returned') + self.assertEqual(mock_plaintext.retrieve_content.call_args[0][0], + self.submission.source_content.identifier, + 'Request is made with source ID.') + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_retrieve_content_bad_response(self, mock_plaintext): + """The plain text service responds oddly which we hope is transient.""" + mock_plaintext.retrieve_content.side_effect = \ + raise_http_exception(exceptions.BadResponse, 200) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.retrieve_content(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_retrieve_content_connection_failed(self, mock_plaintext): + """Cannot conntect to plain text service.""" + mock_plaintext.retrieve_content.side_effect = \ + raise_http_exception(exceptions.ConnectionFailed, -1) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.retrieve_content(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_retrieve_content_bad_request(self, mock_plaintext): + """The request to the plain text service is malformed.""" + mock_plaintext.retrieve_content.side_effect = \ + raise_http_exception(exceptions.BadRequest, 400) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): + # The process is explicitly failed. + self.process.retrieve_content(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.PlainTextService') + def test_retrieve_content_internal_server_error(self, mock_plaintext): + """The plain text service is down.""" + mock_plaintext.retrieve_content.side_effect = \ + raise_http_exception(exceptions.RequestFailed, 500) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The process is explicitly failed. + self.process.retrieve_content(None, trigger, events.append) + + +class TestCallClassifier(TestCase): + """Test :func:`RunAutoclassifier.call_classifier`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = ConfirmPreview(creator=self.creator) + self.process = RunAutoclassifier(self.submission.submission_id) + + @mock.patch(f'{c_and_c.__name__}.Classifier') + def test_call_classifier(self, mock_classifier): + """Request classifier results.""" + content = mock.MagicMock() + mock_classifier.classify.return_value = ( + [classifier.classifier.Suggestion('astro-ph.HE', 0.9)], + [classifier.classifier.Flag('%stop', '0.001'), + classifier.classifier.Flag('linenos', '1')], + classifier.classifier.Counts(32345, 43, 1, 1000) + ) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + res = self.process.call_classifier(content, trigger, events.append) + + self.assertIsNone(res, 'No return') + self.assertEqual(mock_classifier.classify.call_args[0][0], + content, 'Request is made with content.') + + event_types = [type(e) for e in events] + self.assertIn(AddClassifierResults, event_types, + "Classifier results are added to the submission") + self.assertIn(AddContentFlag, event_types, + "Flags are added to the submission") + self.assertIn(AddFeature, event_types, + "Features are added to the submission") + + @mock.patch(f'{c_and_c.__name__}.Classifier') + def test_call_classifier_bad_response(self, mock_classifier): + """The classifier responds oddly which we hope is transient.""" + mock_classifier.classify.side_effect = \ + raise_http_exception(exceptions.BadResponse, 200) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.call_classifier(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.Classifier') + def test_call_classifier_connection_failed(self, mock_classifier): + """Cannot conntect to classifier service.""" + mock_classifier.classify.side_effect = \ + raise_http_exception(exceptions.ConnectionFailed, -1) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The exception is re-raised as a Recoverable error. + self.process.call_classifier(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.Classifier') + def test_call_classifier_bad_request(self, mock_classifier): + """The request to the classifier service is malformed.""" + mock_classifier.classify.side_effect = \ + raise_http_exception(exceptions.BadRequest, 400) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): + # The process is explicitly failed. + self.process.call_classifier(None, trigger, events.append) + + @mock.patch(f'{c_and_c.__name__}.Classifier') + def test_call_classifier_internal_server_error(self, mock_classifier): + """The classifier service is down.""" + mock_classifier.classify.side_effect = \ + raise_http_exception(exceptions.RequestFailed, 500) + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission) + events = [] + with self.app.app_context(): + with self.assertRaises(Recoverable): + # The process is explicitly failed. + self.process.call_classifier(None, trigger, events.append) + + +class TestCheckStopwordCount(TestCase): + """Test :func:`CheckStopwordCount.check_stop_count`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = AddFeature(creator=self.creator) + self.process = CheckStopwordCount(self.submission.submission_id) + + def test_check_low_stop_count(self): + """Submisison has a stopword count feature with a low value.""" + self.submission.annotations['abcd1234'] = Feature( + event_id='abcd1234', + created=datetime.now(UTC), + creator=self.creator, + feature_type=Feature.Type.STOPWORD_COUNT, + feature_value=5 + ) + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP': 6}) + self.process.check_stop_count(None, trigger, events.append) + + self.assertIsInstance(events[0], AddContentFlag, + 'Generates a flag; the stop count is too low') + + def test_check_ok_stop_count(self): + """Submisison has a stopword count feature with a low value.""" + self.submission.annotations['abcd1234'] = Feature( + event_id='abcd1234', + created=datetime.now(UTC), + creator=self.creator, + feature_type=Feature.Type.STOPWORD_COUNT, + feature_value=7 + ) + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP': 6}) + self.process.check_stop_count(None, trigger, events.append) + self.assertEqual(len(events), 0, 'Generates no flags') + + def test_no_stop_count_feature(self): + """Submisison has no stopword count features.""" + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP': 6}) + with self.assertRaises(Failed): + self.process.check_stop_count(None, trigger, events.append) + + +class TestCheckStopwordPercent(TestCase): + """Test :func:`CheckStopwordPercent.check_stop_percent`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.event = AddFeature(creator=self.creator) + self.process = CheckStopwordPercent(self.submission.submission_id) + + def test_check_low_stop_percent(self): + """Submisison has a stopword percent feature with a low value.""" + self.submission.annotations['abcd1234'] = Feature( + event_id='abcd1234', + created=datetime.now(UTC), + creator=self.creator, + feature_type=Feature.Type.STOPWORD_PERCENT, + feature_value=0.001 + ) + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP_PERCENT': 0.005}) + self.process.check_stop_percent(None, trigger, events.append) + + self.assertIsInstance(events[0], AddContentFlag, + 'Generates a flag; the stop count is too low') + + def test_check_ok_stop_percent(self): + """Submisison has a stopword percent feature with a low value.""" + self.submission.annotations['abcd1234'] = Feature( + event_id='abcd1234', + created=datetime.now(UTC), + creator=self.creator, + feature_type=Feature.Type.STOPWORD_PERCENT, + feature_value=0.01 + ) + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP_PERCENT': 0.005}) + self.process.check_stop_percent(None, trigger, events.append) + self.assertEqual(len(events), 0, 'Generates no flags') + + def test_no_stop_percent_feature(self): + """Submisison has no stopword percent features.""" + events = [] + trigger = Trigger(event=self.event, actor=self.creator, + before=self.submission, after=self.submission, + params={'LOW_STOP_PERCENT': 0.005}) + with self.assertRaises(Failed): + self.process.check_stop_percent(None, trigger, events.append) diff --git a/agent/agent/process/tests/test_email_notifications.py b/agent/agent/process/tests/test_email_notifications.py new file mode 100644 index 0000000..ba07cd4 --- /dev/null +++ b/agent/agent/process/tests/test_email_notifications.py @@ -0,0 +1,131 @@ +"""Test sending email notifications.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC, timezone + +from flask import Flask + +from arxiv import mail +from arxiv.submission.domain.event import FinalizeSubmission +from arxiv.submission.domain.agent import User, System +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + SubmissionMetadata, Classification + +from .. import SendConfirmationEmail +from .. import email_notifications +from .. import Failed +from ...domain import Trigger +from ...runner import ProcessRunner +from ...factory import create_app + +sys = System(__name__) +eastern = timezone('US/Eastern') + + +class TestSendConfirmationEmail(TestCase): + """Test the :class:`.SendConfirmationEmail` process.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com', + forename='Ross', surname='Perot') + self.submission_id = 12345 + self.before = Submission( + submission_id=self.submission_id, + metadata=mock.MagicMock( + title="The best title", + authors_display="Frank Underwood (POTUS)", + abstract="Pork loin meatloaf meatball in cow et. Tail pork ut velit, eu prosciutto pork chop pariatur ad non hamburger bacon cupidatat. Short loin nulla aute esse spare ribs eiusmod consequat anim capicola chuck cupim labore alcatra strip steak tail. Lorem short ribs andouille leberkas pork belly. Andouille fatback ham hock burgdoggen, ham pork belly labore doner aute esse.", + comments="Aliqua ham capicola minim filet mignon tenderloin voluptate bacon biltong shank in chuck do pig in. Id pariatur jowl ad ham pork chop doner buffalo laboris sed ut", + msc_class="14J60 (Primary), 14F05, 14J26 (Secondary)", + acm_class="F.2.2; I.2.7", + journal_ref="Nature 2021 39202:32-12", + report_num="Report 1234", + doi="10.00123/43463" + ), + source_content=mock.MagicMock(uncompressed_size=392019), + version=1, + primary_classification=mock.MagicMock(category='cs.DL'), + license=mock.MagicMock(uri='http://some.license/v2'), + creator=self.creator, + owner=self.creator, + created=datetime(2018, 3, 4, 18, 34, 2, tzinfo=UTC), + submitted=datetime(2018, 3, 4, 19, 34, 2, tzinfo=UTC), + status=Submission.WORKING + ) + self.after = Submission( + submission_id=12345, + metadata=mock.MagicMock( + title="The best title", + authors_display="Frank Underwood (POTUS)", + abstract="Pork loin meatloaf meatball in cow et. Tail pork ut velit, eu prosciutto pork chop pariatur ad non hamburger bacon cupidatat. Short loin nulla aute esse spare ribs eiusmod consequat anim capicola chuck cupim labore alcatra strip steak tail. Lorem short ribs andouille leberkas pork belly. Andouille fatback ham hock burgdoggen, ham pork belly labore doner aute esse.", + comments="Aliqua ham capicola minim filet mignon tenderloin voluptate bacon biltong shank in chuck do pig in. Id pariatur jowl ad ham pork chop doner buffalo laboris sed ut", + msc_class="14J60 (Primary), 14F05, 14J26 (Secondary)", + acm_class="F.2.2; I.2.7", + journal_ref="Nature 2021 39202:32-12", + report_num="Report 1234", + doi="10.00123/43463" + ), + source_content=mock.MagicMock(uncompressed_size=392019), + version=1, + primary_classification=mock.MagicMock(category='cs.DL'), + license=mock.MagicMock(uri='http://some.license/v2'), + creator=self.creator, + owner=self.creator, + created=datetime(2018, 3, 4, 18, 34, 2, tzinfo=UTC), + submitted=datetime(2018, 3, 4, 19, 34, 2, tzinfo=UTC), + status=Submission.SUBMITTED + ) + self.event = FinalizeSubmission(creator=self.creator, + created=datetime.now(UTC)) + self.process = SendConfirmationEmail(self.submission_id) + + @mock.patch(f'{email_notifications.__name__}.mail') + def test_bad_trigger(self, mock_mail): + """The trigger lacks sufficient data to send an email.""" + trigger = Trigger() + events = [] + with self.app.app_context(): + with self.assertRaises(Failed): # The process explicitly fails. + self.process.send(None, trigger, events.append) + + @mock.patch(f'{email_notifications.__name__}.mail') + def test_email_confirmation(self, mock_mail): + """Confirmation email should be sent to the submitter.""" + trigger = Trigger(event=self.event, actor=self.creator, + before=self.before, after=self.after) + events = [] + with self.app.app_context(): + self.process.send(None, trigger, events.append) + + recipient, subject, content, html = mock_mail.send.call_args[0] + self.assertIn('We have received your submission to arXiv.', content) + self.assertIn('Your article is scheduled to be announced at Mon, 5 Mar' + ' 2018 20:00:00 ET', content) + self.assertIn('Updates before Mon, 5 Mar 2018 14:00:00 ET will not' + ' delay announcement.', content) + + self.assertIn(f'From: {self.creator.name} <{self.creator.email}>', + content) + self.assertIn('Date: Sun, 4 Mar 2018 13:34:02 ET (392.019 KB)', + content) + self.assertIn(f'Title: {self.after.metadata.title}', content) + self.assertIn(f'Authors: {self.after.metadata.authors_display}', + content) + self.assertIn( + f'Categories: {self.after.primary_classification.category}', + content + ) + self.assertIn(f'MSC classes: {self.after.metadata.msc_class}', content) + self.assertIn(f'ACM classes: {self.after.metadata.acm_class}', content) + self.assertIn(f'Journal reference: {self.after.metadata.journal_ref}', + content) + self.assertIn(f'Report number: {self.after.metadata.report_num}', + content) + self.assertIn(f'License: {self.after.license.uri}', content) + + for line in content.split('\n'): + self.assertLess(len(line), 80, + "No line is longer than 79 characters") diff --git a/agent/agent/process/tests/test_metadata_checks.py b/agent/agent/process/tests/test_metadata_checks.py new file mode 100644 index 0000000..305ac64 --- /dev/null +++ b/agent/agent/process/tests/test_metadata_checks.py @@ -0,0 +1,337 @@ +"""Tests for automated metadata checks.""" + +from unittest import TestCase, mock +from datetime import datetime, timedelta +from pytz import UTC +import copy +from arxiv.submission.domain.event import SetTitle, SetAbstract, \ + AddMetadataFlag, RemoveFlag +from arxiv.submission.domain.agent import Agent, User +from arxiv.submission.domain.flag import Flag, MetadataFlag +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + SubmissionMetadata, Classification + +from .. import CheckForSimilarTitles, CheckTitleForUnicodeAbuse, \ + CheckAbstractForUnicodeAbuse, Failed +from .. import metadata_checks +from ...domain import Trigger +from ...factory import create_app +from .data import titles + + +class TestCheckForSimilarTitles(TestCase): + """Tests for :func:`.metadata_checks.check_similar_titles`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = CheckForSimilarTitles(self.submission.submission_id) + + @mock.patch(f'{metadata_checks.__name__}.classic.get_titles') + def test_check_similar_titles(self, mock_get_titles): + """Check for similar titles.""" + mock_get_titles.return_value = titles.TITLES + + user_id = 54321 + title = 'a lepton qed of colliders or interactions with strong field' \ + ' electron laser' + event = SetTitle(creator=self.creator, title=title) + before = copy.deepcopy(self.submission) + after = copy.deepcopy(self.submission) + after.metadata = SubmissionMetadata(title=title) + events = [] + + trigger = Trigger(event=event, actor=self.creator, + before=before, after=after, + params={'TITLE_SIMILARITY_WINDOW': 60}) + + some_titles = self.process.get_candidates(None, trigger, events.append) + + self.assertEqual(len(some_titles), len(titles.TITLES)) + self.assertEqual(mock_get_titles.call_count, 1) + self.assertIsInstance(mock_get_titles.call_args[0][0], datetime) + + def test_check_for_duplicates(self): + """Look for similar titles.""" + title = 'a lepton qed of colliders or interactions with strong field' \ + ' electron laser' + event = SetTitle(creator=self.creator, title=title) + + before = copy.deepcopy(self.submission) + after = copy.deepcopy(self.submission) + after.metadata = SubmissionMetadata(title=title) + + trigger = Trigger(event=event, actor=self.creator, + before=before, after=after, + params={'TITLE_SIMILARITY_THRESHOLD': 0.7}) + events = [] + self.process.check_for_duplicates(titles.TITLES, trigger, + events.append) + self.assertGreater(len(events), 0, "Generates some events") + for event in events: + self.assertIsInstance(event, AddMetadataFlag, + "Generates AddMetadataFlag events") + self.assertEqual( + event.flag_type, + MetadataFlag.Type.POSSIBLE_DUPLICATE_TITLE, + "Flag has type POSSIBLE_DUPLICATE_TITLE" + ) + + def test_check_with_existing_flags(self): + """The submission already has possible dupe title flags.""" + title = 'a lepton qed of colliders or interactions with strong field' \ + ' electron laser' + self.submission.flags['asdf1234'] = MetadataFlag( + event_id='asdf1234', + creator=self.creator, + created=datetime.now(UTC), + flag_type=MetadataFlag.Type.POSSIBLE_DUPLICATE_TITLE, + flag_data={'id': 5, 'title': title, 'owner': self.creator}, + field='title', + comment='possible duplicate title' + ) + event = SetTitle(creator=self.creator, title=title) + + before = copy.deepcopy(self.submission) + after = copy.deepcopy(self.submission) + after.metadata = SubmissionMetadata(title=title) + + trigger = Trigger(event=event, actor=self.creator, + before=before, after=after, + params={'TITLE_SIMILARITY_THRESHOLD': 1.0}) + events = [] + self.process.check_for_duplicates(titles.TITLES, trigger, + events.append) + self.assertGreater(len(events), 0, "Generates some events") + self.assertIsInstance(events[0], RemoveFlag) + self.assertEqual(events[0].flag_id, 'asdf1234') + + def test_check_for_duplicates_with_strict_threshold(self): + """Look for similar titles with an impossibly strict threshold.""" + title = 'a lepton qed of colliders or interactions with strong field' \ + ' electron laser' + event = SetTitle(creator=self.creator, title=title) + before = copy.deepcopy(self.submission) + after = copy.deepcopy(self.submission) + after.metadata = SubmissionMetadata(title=title) + + trigger = Trigger(event=event, actor=self.creator, + before=before, after=after, + params={'TITLE_SIMILARITY_THRESHOLD': 1.0}) + events = [] + self.process.check_for_duplicates(titles.TITLES, trigger, + events.append) + self.assertEqual(len(events), 0) + + +class TestCheckTitleForUnicodeAbuse(TestCase): + """Tests for :func:`.CheckTitleForUnicodeAbuse.check_title`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = CheckTitleForUnicodeAbuse(self.submission.submission_id) + + def test_low_ascii(self): + """Title has very few ASCII characters.""" + before = copy.deepcopy(self.submission) + title = 'ⓕöö tïtłę' + self.submission.metadata = SubmissionMetadata(title=title) + event = SetTitle(creator=self.creator, title=title) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.5}) + + events = [] + self.process.check_title(None, trigger, events.append) + self.assertIsInstance(events[0], AddMetadataFlag, 'Adds metadata flag') + self.assertEqual(events[0].flag_type, MetadataFlag.Type.CHARACTER_SET) + self.assertEqual(events[0].field, 'title') + self.assertEqual(events[0].flag_data['ascii'], 3/9) + + def test_plenty_of_ascii(self): + """Title has very planty of ASCII characters.""" + before = copy.deepcopy(self.submission) + title = 'A boring title with occâsional non-ASCII characters' + self.submission.metadata = SubmissionMetadata(title=title) + event = SetTitle(creator=self.creator, title=title) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + + events = [] + self.process.check_title(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No flags generated') + + def test_no_metadata(self): + """The submission has no metadata.""" + self.submission.metadata = None + trigger = Trigger(actor=self.creator, + before=self.submission, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + events = [] + with self.assertRaises(Failed): + self.process.check_title(None, trigger, events.append) + + def test_no_abstract(self): + """The submission has no title.""" + self.submission.metadata = SubmissionMetadata(title=None) + trigger = Trigger(actor=self.creator, + before=self.submission, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + events = [] + with self.assertRaises(Failed): + self.process.check_title(None, trigger, events.append) + + def test_clear_previous_tags(self): + """There were some previous flags.""" + self.submission.flags['asdf1234'] = MetadataFlag( + event_id='asdf1234', + creator=self.creator, + created=datetime.now(UTC), + flag_type=MetadataFlag.Type.CHARACTER_SET, + flag_data={'ascii': 0}, + field='title', + comment='something fishy' + ) + before = copy.deepcopy(self.submission) + title = 'A boring title with occâsional non-ASCII characters' + self.submission.metadata = SubmissionMetadata(title=title) + event = SetTitle(creator=self.creator, title=title) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + + events = [] + self.process.check_title(None, trigger, events.append) + self.assertGreater(len(events), 0, "Generates some events") + self.assertIsInstance(events[0], RemoveFlag) + self.assertEqual(events[0].flag_id, 'asdf1234') + + +class TestCheckAbstractForUnicodeAbuse(TestCase): + """Tests for :func:`.CheckAbstractForUnicodeAbuse.check_abstract`.""" + + def setUp(self): + """We have a submission.""" + self.app = create_app() + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = \ + CheckAbstractForUnicodeAbuse(self.submission.submission_id) + + def test_low_ascii(self): + """Abstract has very few ASCII characters.""" + before = copy.deepcopy(self.submission) + abstract = 'ⓥéⓇÿ âⒷśⓣⓇāčⓣ' + self.submission.metadata = SubmissionMetadata(abstract=abstract) + event = SetAbstract(creator=self.creator, abstract=abstract) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.5}) + + events = [] + self.process.check_abstract(None, trigger, events.append) + self.assertIsInstance(events[0], AddMetadataFlag, 'Adds metadata flag') + self.assertEqual(events[0].flag_type, MetadataFlag.Type.CHARACTER_SET) + self.assertEqual(events[0].field, 'abstract') + self.assertEqual(events[0].flag_data['ascii'], 1/13) + + def test_plenty_of_ascii(self): + """Abstract has very planty of ASCII characters.""" + before = copy.deepcopy(self.submission) + abstract = 'what a boring abstract with no unicode characters' + self.submission.metadata = SubmissionMetadata(abstract=abstract) + event = SetAbstract(creator=self.creator, abstract=abstract) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + + events = [] + self.process.check_abstract(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No flags generated') + + def test_no_metadata(self): + """The submission has no metadata.""" + self.submission.metadata = None + trigger = Trigger(actor=self.creator, + before=self.submission, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + events = [] + with self.assertRaises(Failed): + self.process.check_abstract(None, trigger, events.append) + + def test_no_abstract(self): + """The submission has no abstract.""" + self.submission.metadata = SubmissionMetadata(abstract=None) + trigger = Trigger(actor=self.creator, + before=self.submission, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + events = [] + with self.assertRaises(Failed): + self.process.check_abstract(None, trigger, events.append) + + def test_clear_previous_tags(self): + """There were some previous flags.""" + self.submission.flags['asdf1234'] = MetadataFlag( + event_id='asdf1234', + creator=self.creator, + created=datetime.now(UTC), + flag_type=MetadataFlag.Type.CHARACTER_SET, + flag_data={'ascii': 0}, + field='abstract', + comment='something fishy' + ) + before = copy.deepcopy(self.submission) + abstract = 'what a boring abstract with no unicode characters' + self.submission.metadata = SubmissionMetadata(abstract=abstract) + event = SetAbstract(creator=self.creator, abstract=abstract) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params={'METADATA_ASCII_THRESHOLD': 0.1}) + + events = [] + self.process.check_abstract(None, trigger, events.append) + self.assertGreater(len(events), 0, "Generates some events") + self.assertIsInstance(events[0], RemoveFlag) + self.assertEqual(events[0].flag_id, 'asdf1234') diff --git a/agent/agent/process/tests/test_reclassification.py b/agent/agent/process/tests/test_reclassification.py new file mode 100644 index 0000000..bad9687 --- /dev/null +++ b/agent/agent/process/tests/test_reclassification.py @@ -0,0 +1,270 @@ +"""Tests for reclassification policies.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC +import copy + +from arxiv.submission.domain.event import AddClassifierResults, \ + SetPrimaryClassification, AddSecondaryClassification, AddProposal, \ + FinalizeSubmission, AcceptProposal +from arxiv.submission.domain.agent import User, System +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + Classification, License, SubmissionMetadata +from arxiv.submission.domain.proposal import Proposal +from arxiv.submission.domain.annotation import ClassifierResults + +from arxiv.submission.services.classifier.classifier import Classifier + +from ..import reclassification, ProposeReclassification, \ + ProposeCrossListFromPrimaryCategory, AcceptSystemCrossListProposals +from ...domain import Trigger +from ...factory import create_app +from ... import config + +prob = Classifier.probability +sys = System(__name__) + + +class TestProposeReclassification(TestCase): + """We use classifier results to propose reclassification.""" + + # These test cases are ported from + # arxiv-lib/t/arxiv_classifier/check_scores.t. + CASES = [ + {'primary_category': 'cs.AI', + 'results': [{"category": 'cs.DL', 'probability': prob(1.01)}, + {"category": 'math.GM', 'probability': prob(1.90)}, + {"category": 'cs.AI', 'probability': prob(-0.03)}], + 'expected_category': 'cs.DL', + 'expected_reason': 'selected primary cs.AI has probability 0.493'}, + {'primary_category': 'cs.AI', + 'results': [{"category": 'physics.gen-ph', 'probability': prob(1.01)}, + {"category": 'math.GM', 'probability': prob(1.02)}, + {"category": 'cs.AI', 'probability': prob(-0.05)}], + 'expected_category': 'math.GM', + 'expected_reason': 'selected primary cs.AI has probability 0.488'}, + {'primary_category': 'cs.AI', + 'results': [{"category": 'cs.AI', 'probability': prob(1.05)}, + {"category": 'math.GM', 'probability': prob(1.02)}, + {"category": 'cs.DL', 'probability': prob(0.05)}], + 'expected_category': None, + 'expected_reason': None}, + {'primary_category': 'cs.CE', + 'results': [{"category": 'cs.DL', 'probability': prob(1.01)}, + {"category": 'math.GM', 'probability': prob(1.90)}, + {"category": 'cs.CE', 'probability': prob(-0.04)}], + 'expected_category': None, + 'expected_reason': None}, + {'primary_category': 'eess.IV', + 'results': [{"category": 'cs.DL', 'probability': prob(1.01)}, + {"category": 'math.GM', 'probability': prob(1.90)}, + {"category": 'eess.IV', 'probability': prob(-0.04)}], + 'expected_category': 'math.GM', + 'expected_reason': 'selected primary eess.IV has probability 0.49'}, + {'primary_category': 'eess.SP', + 'results': [{"category": 'cs.DL', 'probability': prob(1.01)}, + {"category": 'math.GM', 'probability': prob(1.90)}], + 'expected_category': 'math.GM', + 'expected_reason': 'selected primary eess.SP not found in classifier' + ' scores'} + ] + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = ProposeReclassification(self.submission.submission_id) + + def test_suggestions(self): + """Test suggestions using :const:`.CASES`.""" + before = copy.deepcopy(self.submission) + for case in self.CASES: + self.submission.primary_classification \ + = Classification(category=case['primary_category']) + self.submission.annotations = { + 'asdf1234': ClassifierResults( + event_id='asdf1234', + creator=self.creator, + created=datetime.now(UTC), + results=case['results'] + ) + } + events = [] + params = { + 'NO_RECLASSIFY_ARCHIVES': config.NO_RECLASSIFY_ARCHIVES, + 'NO_RECLASSIFY_CATEGORIES': config.NO_RECLASSIFY_CATEGORIES, + 'RECLASSIFY_PROPOSAL_THRESHOLD': + config.RECLASSIFY_PROPOSAL_THRESHOLD + } + event = AddClassifierResults(creator=self.creator, + created=datetime.now(UTC), + results=case['results']) + trigger = Trigger(event=event, actor=self.creator, + before=before, after=self.submission, + params=params) + self.process.propose_primary(None, trigger, events.append) + + if case['expected_category'] is None: + self.assertEqual(len(events), 0, "No proposals are made") + else: + self.assertIsInstance(events[0], AddProposal, + "Generates AddProposal") + self.assertEqual(events[0].proposed_event_type, + SetPrimaryClassification, + "Proposes reclassification") + self.assertEqual(events[0].proposed_event_data["category"], + case['expected_category']) + self.assertEqual(events[0].comment, case['expected_reason']) + + +class TestProposeCrossFromPrimary(TestCase): + """In some cases, we propose a cross-list category based on primary.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.submission.submitter_contact_verified = True + self.submission.submitter_accepts_policy = True + self.submission.license = License(name='foo', uri='http://foo.foo') + self.submission.metadata = SubmissionMetadata( + title='foo', + abstract='oof', + authors_display='Bloggs, J' + ) + self.process = \ + ProposeCrossListFromPrimaryCategory(self.submission.submission_id) + + def test_propose_cross(self): + """Propose a cross-list category based on primary.""" + self.submission.primary_classification = Classification('cs.AI') + event = FinalizeSubmission(creator=self.creator, + created=datetime.now(UTC)) + events = [] + params = {'AUTO_CROSS_FOR_PRIMARY': {'cs.AI': 'math.GM'}} + trigger = Trigger(event=event, actor=self.creator, + before=self.submission, + after=self.submission, params=params) + self.process.propose(None, trigger, events.append) + + self.assertIsInstance(events[0], AddProposal, + 'Adds a proposal') + self.assertEqual(events[0].proposed_event_type, + AddSecondaryClassification, + 'Proposes to add a secondary') + self.assertEqual(events[0].proposed_event_data['category'], 'math.GM', + 'Proposes cross-list category') + self.assertEqual(events[0].comment, 'cs.AI is primary') + + def test_no_rule_exists(self): + """Propose a cross-list category based on primary.""" + self.submission.primary_classification = Classification('cs.DL') + event = FinalizeSubmission(creator=self.creator, + created=datetime.now(UTC)) + events = [] + params = {'AUTO_CROSS_FOR_PRIMARY': {'cs.AI': 'math.GM'}} + trigger = Trigger(event=event, actor=self.creator, + before=self.submission, after=self.submission, + params=params) + self.process.propose(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No proposals are made') + + def test_cross_already_set(self): + """The cross-list category is already present.""" + self.submission.primary_classification = Classification('cs.AI') + self.submission.secondary_classification = [Classification('math.GM')] + event = FinalizeSubmission(creator=self.creator, + created=datetime.now(UTC)) + events = [] + params = {'AUTO_CROSS_FOR_PRIMARY': {'cs.AI': 'math.GM'}} + trigger = Trigger(event=event, actor=self.creator, + before=self.submission, after=self.submission, + params=params) + self.process.propose(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No proposals are made') + + +class TestAcceptSystemCrossProposal(TestCase): + """We accept cross-list proposals that we generate ourselves.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = \ + AcceptSystemCrossListProposals(self.submission.submission_id) + + def test_system_cross_proposal(self): + """A cross-list proposal is generated by the system.""" + event = AddProposal(creator=sys, created=datetime.now(UTC), + proposed_event_type=AddSecondaryClassification, + proposed_event_data={'category': 'cs.DL'}) + self.submission.proposals[event.event_id] = Proposal( + event_id=event.event_id, + creator=sys, + proposed_event_type=AddSecondaryClassification, + proposed_event_data={'category': 'cs.DL'} + ) + events = [] + trigger = Trigger(event=event, actor=sys, before=self.submission, + after=self.submission, params={}) + self.process.accept(None, trigger, events.append) + self.assertIsInstance(events[0], AcceptProposal, + 'The proposal is accepted') + self.assertEqual(events[0].proposal_id, event.event_id, + 'Proposal is identified by the event that created it') + + def test_user_cross_proposal(self): + """A cross-list proposal is generated by a user.""" + event = AddProposal(creator=self.creator, created=datetime.now(UTC), + proposed_event_type=AddSecondaryClassification, + proposed_event_data={'category': 'cs.DL'}) + self.submission.proposals[event.event_id] = Proposal( + event_id=event.event_id, + creator=self.creator, + proposed_event_type=AddSecondaryClassification, + proposed_event_data={'category': 'cs.DL'} + ) + events = [] + trigger = Trigger(event=event, actor=self.creator, + before=self.submission, + after=self.submission, params={}) + self.process.accept(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No proposal is generated') diff --git a/agent/agent/process/tests/test_size_limits.py b/agent/agent/process/tests/test_size_limits.py new file mode 100644 index 0000000..2faa017 --- /dev/null +++ b/agent/agent/process/tests/test_size_limits.py @@ -0,0 +1,305 @@ +"""Tests for content size checks.""" + +from unittest import TestCase, mock +from datetime import datetime, timedelta +from pytz import UTC +import copy + +from arxiv.integration.api import status, exceptions +from arxiv.submission.domain.event import SetTitle, SetAbstract, \ + AddMetadataFlag, RemoveFlag, AddHold, RemoveHold +from arxiv.submission.domain.agent import Agent, User +from arxiv.submission.domain.flag import Flag, MetadataFlag +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + SubmissionMetadata, Classification, Compilation, Hold + +from .. import CheckPDFSize, CheckSubmissionSourceSize, Failed, Recoverable +from .. import size_limits +from ...domain import Trigger +from ...factory import create_app +from .data import titles +from .util import raise_http_exception + + +class TestCheckSubmissionSourceSize(TestCase): + """Test :func:`.CheckSubmissionSourceSize.check`.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC) + ) + self.process = CheckSubmissionSourceSize(self.submission.submission_id) + + def test_no_source(self): + """Submission has no source.""" + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + with self.assertRaises(Failed): + self.process.check(None, trigger, events.append) + + def test_small_source(self): + """The submission source content is quite small.""" + self.submission.source_content = SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=593, + compressed_size=53 + ) + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + self.process.check(None, trigger, events.append) + self.assertEqual(len(events), 0, 'No events generated') + + def test_small_source_previous_hold(self): + """The submission has a hold, but this source content is OK.""" + self.submission.source_content = SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=593, + compressed_size=53 + ) + self.submission.holds['asdf1234'] = Hold( + event_id='asdf1234', + creator=self.creator, + hold_type=Hold.Type.SOURCE_OVERSIZE + ) + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + self.process.check(None, trigger, events.append) + self.assertIsInstance(events[0], RemoveHold, 'Removes a hold') + self.assertEqual(events[0].hold_event_id, 'asdf1234') + self.assertEqual(events[0].hold_type, Hold.Type.SOURCE_OVERSIZE) + + def test_huge_uncompressed_size(self): + """The submission source is huge uncompressed.""" + self.submission.source_content = SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=593_032_039, + compressed_size=53 + ) + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + self.process.check(None, trigger, events.append) + self.assertIsInstance(events[0], AddHold, 'Adds a hold') + self.assertEqual(events[0].hold_type, Hold.Type.SOURCE_OVERSIZE) + + def test_huge_previous_holds(self): + """The submission has a hold, and this source content is too big.""" + self.submission.source_content = SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=593_032_039, + compressed_size=593_032_039 + ) + self.submission.holds['asdf1234'] = Hold( + event_id='asdf1234', + creator=self.creator, + hold_type=Hold.Type.SOURCE_OVERSIZE + ) + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + self.process.check(None, trigger, events.append) + self.assertEqual(len(events), 0, 'Generates no holds') + + def test_huge_compressed_size(self): + """The submission source is huge compressed.""" + self.submission.source_content = SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=493, + compressed_size=593_032_039 # Something is very wrong... + ) + trigger = Trigger(before=self.submission, after=self.submission, + params={'UNCOMPRESSED_PACKAGE_MAX_BYTES': 40_003_932, + 'COMPRESSED_PACKAGE_MAX_BYTES': 3_039_303}) + events = [] + self.process.check(None, trigger, events.append) + self.assertIsInstance(events[0], AddHold, 'Adds a hold') + self.assertEqual(events[0].hold_type, Hold.Type.SOURCE_OVERSIZE) + + +class TestPDFGetSize(TestCase): + """Test :func:`.CheckPDFSize.get_size`.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = CheckPDFSize(self.submission.submission_id) + + def test_get_size_no_source(self): + """The submission has no source content.""" + self.submission.source_content = None + events = [] + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator) + with self.assertRaises(Failed): + self.process.get_size(None, trigger, events.append) + + @mock.patch(f'{size_limits.__name__}.get_system_token', + mock.MagicMock(return_value='footoken')) + @mock.patch(f'{size_limits.__name__}.compiler.Compiler.get_status') + def test_get_size_server_error(self, mock_get_status): + """The compiler service flakes out.""" + mock_get_status.side_effect = \ + raise_http_exception(exceptions.RequestFailed, 500) + events = [] + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator) + with self.assertRaises(Recoverable): + self.process.get_size(None, trigger, events.append) + + @mock.patch(f'{size_limits.__name__}.get_system_token', + mock.MagicMock(return_value='footoken')) + @mock.patch(f'{size_limits.__name__}.compiler.Compiler.get_status') + def test_get_size_compilation_in_progress(self, mock_get_status): + """The submission has a compilation but it is not finished.""" + mock_get_status.return_value = mock.MagicMock( + status=Compilation.Status.IN_PROGRESS + ) + + events = [] + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator) + with self.assertRaises(Recoverable): + self.process.get_size(None, trigger, events.append) + + @mock.patch(f'{size_limits.__name__}.get_system_token', + mock.MagicMock(return_value='footoken')) + @mock.patch(f'{size_limits.__name__}.compiler.Compiler.get_status') + def test_get_size_compilation_failed(self, mock_get_status): + """The submission has a compilation but it failed.""" + mock_get_status.return_value = mock.MagicMock( + status=Compilation.Status.FAILED + ) + + events = [] + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator) + with self.assertRaises(Failed): + self.process.get_size(None, trigger, events.append) + + @mock.patch(f'{size_limits.__name__}.get_system_token', + mock.MagicMock(return_value='footoken')) + @mock.patch(f'{size_limits.__name__}.compiler.Compiler.get_status') + def test_get_size(self, mock_get_status): + """The submission has a compilation.""" + size_bytes = 50_030_299_399 + mock_get_status.return_value = mock.MagicMock( + status=Compilation.Status.SUCCEEDED, + size_bytes=size_bytes + ) + + events = [] + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator) + + self.assertEqual(self.process.get_size(None, trigger, events.append), + size_bytes, 'Gets the compilation size in bytes') + + +class TestEvaluatePDFSize(TestCase): + """Test :func:`.CheckPDFSize.evaluate_size`.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com') + self.submission = Submission( + submission_id=2347441, + creator=self.creator, + owner=self.creator, + created=datetime.now(UTC), + source_content=SubmissionContent( + identifier='5678', + source_format=SubmissionContent.Format('pdf'), + checksum='a1b2c3d4', + uncompressed_size=58493, + compressed_size=58493 + ) + ) + self.process = CheckPDFSize(self.submission.submission_id) + + def test_huge_pdf(self): + """The PDF is huge.""" + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator, params={'PDF_LIMIT_BYTES': 5_000_000}) + size_bytes = 50_030_299_399 + events = [] + self.process.evaluate_size(size_bytes, trigger, events.append) + self.assertIsInstance(events[0], AddHold, 'Adds a hold') + self.assertEqual(events[0].hold_type, Hold.Type.PDF_OVERSIZE, + 'Adds a PDF oversize hold') + + def test_small_pdf(self): + """The PDF is quite small.""" + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator, params={'PDF_LIMIT_BYTES': 5_000_000}) + size_bytes = 549 + events = [] + self.process.evaluate_size(size_bytes, trigger, events.append) + self.assertEqual(len(events), 0, 'No holds are generated') + + def test_existing_hold(self): + """The submission already has an oversize hold, and this PDF is OK.""" + self.submission.holds['asdf1234'] = Hold( + event_id='asdf1234', + creator=self.creator, + hold_type=Hold.Type.PDF_OVERSIZE + ) + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator, params={'PDF_LIMIT_BYTES': 5_000_000}) + size_bytes = 549 + events = [] + self.process.evaluate_size(size_bytes, trigger, events.append) + self.assertIsInstance(events[0], RemoveHold, 'Removes a hold') + self.assertEqual(events[0].hold_type, Hold.Type.PDF_OVERSIZE, + 'Removes a PDF oversize hold') + self.assertEqual(events[0].hold_event_id, 'asdf1234', + 'Removes the existing PDF oversize hold') + + def test_existing_hold_still_huge(self): + """The submission already has a hold, and this PDF is still huge.""" + self.submission.holds['asdf1234'] = Hold( + event_id='asdf1234', + creator=self.creator, + hold_type=Hold.Type.PDF_OVERSIZE + ) + trigger = Trigger(before=self.submission, after=self.submission, + actor=self.creator, params={'PDF_LIMIT_BYTES': 5_000_000}) + size_bytes = 50_030_299_399 + events = [] + self.process.evaluate_size(size_bytes, trigger, events.append) + self.assertEqual(len(events), 0, 'No events are generated') diff --git a/agent/agent/process/tests/util.py b/agent/agent/process/tests/util.py new file mode 100644 index 0000000..da6bb11 --- /dev/null +++ b/agent/agent/process/tests/util.py @@ -0,0 +1,7 @@ +from unittest import mock + + +def raise_http_exception(exc, code: int, msg='argle bargle'): + def side_effect(*args, **kwargs): + raise exc(msg, mock.MagicMock(status_code=code)) + return side_effect diff --git a/agent/agent/process/util.py b/agent/agent/process/util.py new file mode 100644 index 0000000..4627dd5 --- /dev/null +++ b/agent/agent/process/util.py @@ -0,0 +1,27 @@ +"""Helper functions.""" + +from functools import lru_cache as memoize + +from arxiv.base.globals import get_application_config + + +@memoize(maxsize=1028) +def is_ascii(string: str) -> bool: + """Determine whether or not a string is ASCII.""" + try: + bytes(string, encoding='ascii') + return True + except UnicodeEncodeError: + return False + + +def below_ascii_threshold(proportion: float) -> bool: + """Whether or not the proportion of ASCII characters is too low.""" + threshold = get_application_config().get('TITLE_ASCII_THRESHOLD', 0.5) + return proportion < threshold + + +@memoize(maxsize=1028) +def proportion_ascii(phrase: str) -> float: + """Calculate the proportion of a string comprised of ASCII characters.""" + return len([c for c in phrase if is_ascii(c)])/len(phrase) diff --git a/agent/agent/rules/__init__.py b/agent/agent/rules/__init__.py new file mode 100644 index 0000000..dd24759 --- /dev/null +++ b/agent/agent/rules/__init__.py @@ -0,0 +1,115 @@ +""" +Submission event rules. + +A **rule** defines the circumstances under which a process should be carried +out. Specifically, a rule is associated with a particular type of event, and a +function that determines whether the process should be carried out based on the +event properties and/or the state of the submission. + +Rules are implemented by instantiating :class:`.Rule` in this module. +""" + +from typing import Dict, Any, Iterable, Tuple +from arxiv.base import logging +from arxiv.submission import Event, Submission +from arxiv.submission.domain.event import \ + SetTitle, \ + SetAbstract, \ + SetUploadPackage, \ + UpdateUploadPackage, \ + ConfirmPreview, \ + FinalizeSubmission, \ + AddFeature, \ + AddClassifierResults, \ + AddProposal + +from arxiv.submission.domain.annotation import Feature + +from ..domain import Trigger +from .. import process +from .base import Rule, REGISTRY, ParamFunc +from .conditions import is_user_event, is_system_event, is_feature_type, \ + is_always +from .params import empty_params, make_params + +logger = logging.getLogger(__name__) +Params = Dict[str, Any] + + +def evaluate(event: Event, before: Submission, after: Submission) \ + -> Iterable[Tuple[process.Process, Params]]: + """ + Evaluate an event against known rules. + + Parameters + ---------- + event : :class:`.domain.Event` + The event to evaluate. + before : :class:`.domain.submission.Submission` + The state of the submission prior to the event. + after : :class:`.domain.submission.Submission` + The state of the submission after the event. + + Returns + ------- + iterable + Each item is a two-tuple, composed of a triggered :class:`.Process` + instance and the configuration parameters with which it should be run. + + """ + logger.debug('evaluate event %s (%s)', event.event_id, type(event)) + for rule in REGISTRY[type(event)]: + if rule.condition(event, before, after): + logger.debug('event %s matches rule %s', event.event_id, rule.name) + params = rule.params(event, before, after) + process = rule.process(event.submission_id) + yield process, params + + +title_params = make_params('TITLE_SIMILARITY_WINDOW', + 'TITLE_SIMILARITY_THRESHOLD') +reclass_params = make_params('NO_RECLASSIFY_CATEGORIES', + 'NO_RECLASSIFY_ARCHIVES', + 'RECLASSIFY_PROPOSAL_THRESHOLD', + 'AUTO_CROSS_FOR_PRIMARY') +size_params = make_params('UNCOMPRESSED_PACKAGE_MAX_BYTES', + 'COMPRESSED_PACKAGE_MAX_BYTES') + + +Rule(ConfirmPreview, is_user_event, empty_params, process.RunAutoclassifier, + "Run the autoclassifier when the preview is confirmed by the submitter") +Rule(AddFeature, is_feature_type(Feature.Type.STOPWORD_PERCENT), + make_params('LOW_STOP_PERCENT'), process.CheckStopwordPercent, + "Add a flag if the percentage of stopwords is below a threshold value") +Rule(AddFeature, is_feature_type(Feature.Type.STOPWORD_COUNT), + make_params('LOW_STOP'), process.CheckStopwordCount, + "Add a flag if the number of stopwords is below a threshold value") +Rule(FinalizeSubmission, is_always, empty_params, + process.SendConfirmationEmail, + "Send a confirmation e-mail when a submission is finalized") +Rule(SetTitle, is_user_event, title_params, process.CheckForSimilarTitles, + "Check for other submissions with similar titles, and add a flag") +Rule(SetTitle, is_user_event, make_params('METADATA_ASCII_THRESHOLD'), + process.CheckTitleForUnicodeAbuse, + "Check the title for excessive non-ASCII characters, and add a flag") +Rule(SetAbstract, is_user_event, make_params('METADATA_ASCII_THRESHOLD'), + process.CheckAbstractForUnicodeAbuse, + "Check the title for excessive non-ASCII characters, and add a flag") +Rule(AddClassifierResults, is_always, reclass_params, + process.ProposeReclassification, + "Evaluate classifier results and propose new classifications") +Rule(FinalizeSubmission, is_always, reclass_params, + process.ProposeCrossListFromPrimaryCategory, + "Propose cross-list categories based on user selected primary category") +Rule(AddProposal, is_system_event, empty_params, + process.AcceptSystemCrossListProposals, + "Accept our own proposals for adding cross-list categories") +Rule(SetUploadPackage, is_always, size_params, + process.CheckSubmissionSourceSize, + "Check the size of the source when it is created, and add/remove holds") +Rule(UpdateUploadPackage, is_always, size_params, + process.CheckSubmissionSourceSize, + "Check the size of the source when it is updated, and add/remove holds") +Rule(ConfirmPreview, is_always, make_params('PDF_LIMIT_BYTES'), + process.CheckPDFSize, + "Check the size of the PDF when the submitter confirms the preview.") diff --git a/agent/agent/rules/base.py b/agent/agent/rules/base.py new file mode 100644 index 0000000..c0c81f2 --- /dev/null +++ b/agent/agent/rules/base.py @@ -0,0 +1,70 @@ +"""Provides the core representation of a rule.""" + +from typing import Mapping, List, Callable, Dict, Any +from collections import defaultdict +from dataclasses import dataclass + +from arxiv.submission.domain import Submission +from arxiv.submission.domain.event import Event, EventType +from ..process import ProcessType + +Condition = Callable[[Event, Submission, Submission], bool] +ParamFunc = Callable[[Event, Submission, Submission], Dict[str, Any]] + +REGISTRY: Mapping[EventType, List['Rule']] = defaultdict(list) +""" +Registry for :class:`.Rule` instances. + +We keep a reference here so that we can easily look up all rules that apply +to a particular event type. +""" + + +@dataclass +class Rule: + """ + Represents an event rule for a process. + + A **rule** defines the circumstances under which a process should be + carried out. Specifically, a rule is associated with a particular type of + event, and a function that determines whether the process should be carried + out based on the event properties and/or the state of the submission. + """ + + event_type: EventType + """The event type (class) on which the rule should be evaluated.""" + + condition: Condition + """ + Conditions under which to run :attr:`.process`. + + This is a function that evaluates the event, and the state of the + submission prior to and after the event. It is only called for an event of + type :attr:`.event_type`. If the function returns True, then + :attr:`.process` should be carried out. + """ + + params: ParamFunc + """ + Provides the runtime configuration parameters for :attr:`.process`. + + This is a function that evaluates the event, and the state of the + submission prior to and after the event, and returns a dict of + configuration parameters. Those parameters are passed to the process when + it is run. + """ + + process: ProcessType + """ + The process (class) to carry out. + + This should be carried out iff :attr:`.event_type` and :attr:`.condition` + are satisfied. + """ + + name: str + """The name of the process.""" + + def __post_init__(self): + """Register this instance.""" + REGISTRY[self.event_type].append(self) diff --git a/agent/agent/rules/conditions.py b/agent/agent/rules/conditions.py new file mode 100644 index 0000000..0e3d818 --- /dev/null +++ b/agent/agent/rules/conditions.py @@ -0,0 +1,32 @@ +"""Rule condition helpers.""" + +from arxiv.submission.domain.event import Event, AddFeature +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.agent import Agent, User, System +from arxiv.submission.domain.annotation import Feature + +from .base import Condition + + +def is_system_event(event: Event, before: Submission, + after: Submission) -> bool: + """Only for system-created events.""" + return type(event.creator) is System + + +def is_user_event(event: Event, before: Submission, after: Submission) -> bool: + """Only for user-created events.""" + return type(event.creator) is User + + +def is_always(event: Event, before: Submission, after: Submission) -> bool: + """Return ``True``. Always means always.""" + return True + + +def is_feature_type(feature_type: Feature.Type) -> Condition: + """Generate a condition based on feature type.""" + def condition(event: AddFeature, before: Submission, + after: Submission) -> bool: + return event.feature_type is feature_type + return condition diff --git a/agent/agent/rules/params.py b/agent/agent/rules/params.py new file mode 100644 index 0000000..64db2c6 --- /dev/null +++ b/agent/agent/rules/params.py @@ -0,0 +1,20 @@ +"""Rule param-getter helpers.""" + +from typing import Dict, Any + +from arxiv.base.globals import get_application_config + +from .base import ParamFunc + + +def empty_params(*args, **kwargs) -> Dict[str, Any]: + """Return an empty dict.""" + return {} + + +def make_params(*variables: str) -> ParamFunc: + """Make a param-getting function from config variables.""" + def params(*args, **kwargs): + config = get_application_config() + return {variable: config[variable] for variable in variables} + return params diff --git a/agent/agent/rules/tests/__init__.py b/agent/agent/rules/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent/agent/rules/tests/test_rules.py b/agent/agent/rules/tests/test_rules.py new file mode 100644 index 0000000..6b378bf --- /dev/null +++ b/agent/agent/rules/tests/test_rules.py @@ -0,0 +1,123 @@ +"""Test that expected processes are called under different conditions.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC, timezone + +from arxiv.submission.domain.event import FinalizeSubmission, ConfirmPreview +from arxiv.submission.domain.agent import User, System +from arxiv.submission.domain.submission import Submission, SubmissionContent, \ + SubmissionMetadata, Classification + + +from ...services import database +from .. import process +from ...domain import Trigger +from ...factory import create_app +from ... import rules + + +class TestFinalizeSubmission(TestCase): + """Test that expected rules are triggered in response to events.""" + + def setUp(self): + """We have a submission.""" + self.creator = User(native_id=1234, email='something@else.com', + forename='Ross', surname='Perot') + self.app = create_app() + + self.before = mock.MagicMock( + submission_id=12345, + metadata=mock.MagicMock( + title="The best title", + authors_display="Frank Underwood (POTUS)", + abstract="Pork loin meatloaf meatball in cow et. Tail pork ut velit, eu prosciutto pork chop pariatur ad non hamburger bacon cupidatat. Short loin nulla aute esse spare ribs eiusmod consequat anim capicola chuck cupim labore alcatra strip steak tail. Lorem short ribs andouille leberkas pork belly. Andouille fatback ham hock burgdoggen, ham pork belly labore doner aute esse.", + comments="Aliqua ham capicola minim filet mignon tenderloin voluptate bacon biltong shank in chuck do pig in. Id pariatur jowl ad ham pork chop doner buffalo laboris sed ut", + msc_class="14J60 (Primary), 14F05, 14J26 (Secondary)", + acm_class="F.2.2; I.2.7", + journal_ref="Nature 2021 39202:32-12", + report_num="Report 1234", + doi="10.00123/43463" + ), + source_content=mock.MagicMock(uncompressed_size=392019), + version=1, + primary_classification=mock.MagicMock(category='cs.DL'), + license=mock.MagicMock(uri='http://some.license/v2'), + creator=self.creator, + owner=self.creator, + created=datetime(2018, 3, 4, 18, 34, 2, tzinfo=UTC), + submitted=datetime(2018, 3, 4, 19, 34, 2, tzinfo=UTC), + finalized=False + ) + self.after = mock.MagicMock( + submission_id=12345, + metadata=mock.MagicMock( + title="The best title", + authors_display="Frank Underwood (POTUS)", + abstract="Pork loin meatloaf meatball in cow et. Tail pork ut velit, eu prosciutto pork chop pariatur ad non hamburger bacon cupidatat. Short loin nulla aute esse spare ribs eiusmod consequat anim capicola chuck cupim labore alcatra strip steak tail. Lorem short ribs andouille leberkas pork belly. Andouille fatback ham hock burgdoggen, ham pork belly labore doner aute esse.", + comments="Aliqua ham capicola minim filet mignon tenderloin voluptate bacon biltong shank in chuck do pig in. Id pariatur jowl ad ham pork chop doner buffalo laboris sed ut", + msc_class="14J60 (Primary), 14F05, 14J26 (Secondary)", + acm_class="F.2.2; I.2.7", + journal_ref="Nature 2021 39202:32-12", + report_num="Report 1234", + doi="10.00123/43463" + ), + source_content=mock.MagicMock(uncompressed_size=392019), + version=1, + primary_classification=mock.MagicMock(category='cs.DL'), + license=mock.MagicMock(uri='http://some.license/v2'), + creator=self.creator, + owner=self.creator, + created=datetime(2018, 3, 4, 18, 34, 2, tzinfo=UTC), + submitted=datetime(2018, 3, 4, 19, 34, 2, tzinfo=UTC), + finalized=True + ) + + def _get_call_data(self, mock_Runner): + passed = [] + triggers = {} + for i in range(0, len(mock_Runner.mock_calls), 2): + name, args, kwargs = mock_Runner.mock_calls[i] + self.assertEqual(name, '', 'First call is to constructor') + process_inst = args[0] + self.assertIsInstance(process_inst, process.Process, + 'Process is passed') + passed.append(type(process_inst)) + + name, args, kwargs = mock_Runner.mock_calls[i+1] + self.assertEqual(name, '().run', 'Second call is to run method') + trigger = args[0] + self.assertIsInstance(trigger, Trigger, 'Trigger is passed') + triggers[type(process_inst)] = trigger + return passed, triggers + + def test_confirm_preview(self): + """The submitter confirms their preview.""" + event = ConfirmPreview(creator=self.creator, created=datetime.now(UTC)) + + with self.app.app_context(): + passed, configs = [], [] + for proc, conf in rules.evaluate(event, self.before, self.after): + passed.append(type(proc)) + configs.append(conf) + + self.assertIn(process.RunAutoclassifier, passed, + 'Autoclassifier process is started') + self.assertIn(process.CheckPDFSize, passed, + 'PDF size check is started') + + def test_finalize(self): + """The submission is finalized.""" + event = FinalizeSubmission(creator=self.creator, + created=datetime.now(UTC)) + + with self.app.app_context(): + passed, configs = [], [] + for proc, conf in rules.evaluate(event, self.before, self.after): + passed.append(type(proc)) + configs.append(conf) + + self.assertIn(process.SendConfirmationEmail, passed, + 'Email confirmation process is started') + self.assertIn(process.ProposeCrossListFromPrimaryCategory, passed, + 'Propose cross-list process is started') diff --git a/agent/agent/runner/__init__.py b/agent/agent/runner/__init__.py new file mode 100644 index 0000000..704f6bc --- /dev/null +++ b/agent/agent/runner/__init__.py @@ -0,0 +1,4 @@ +"""Provides process runners, which carry out the steps of a process.""" + +from .base import ProcessRunner +from .async_runner import AsyncProcessRunner diff --git a/agent/agent/runner/async_runner.py b/agent/agent/runner/async_runner.py new file mode 100644 index 0000000..fb1100e --- /dev/null +++ b/agent/agent/runner/async_runner.py @@ -0,0 +1,307 @@ +""" +Provides an asynchronous process runner. + +The :class:`.AsyncProcessRunner` registers and dispatches processes via +Celery, to be carried out by the worker. +""" + +from typing import Callable, Optional, Iterable, Tuple, Any, Union, Dict, \ + List +from functools import wraps, partial +import math +import random + +from flask import Flask +from celery import shared_task, Celery, Task, chain +from celery.result import AsyncResult +from retry import retry + +from arxiv.base.globals import get_application_config, get_application_global +from arxiv.base import logging +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.event import Event +from arxiv.submission.domain.agent import Agent +from arxiv.submission import save +from arxiv.submission.exceptions import NothingToDo +from arxiv.submission.services import classic +from .. import config + +from .base import ProcessRunner +from ..process import ProcessType, Process, Failed, Recoverable +from ..domain import ProcessData, Trigger + +logger = logging.getLogger(__name__) +logger.propagate = False + + +class AsyncProcessRunner(ProcessRunner): + """ + Runs :class:`.Process` instances asynchronously. + + In order for this to work at runtime, :meth:`.prepare` *MUST* be called + with the process class at import time in both the process that dispatches + tasks (e.g. the event consumer) and the worker process. + """ + + processes = {} + + @classmethod + def prepare(cls, ProcessImpl: ProcessType) -> None: + """ + Register an :class:`.ProcessType` for asynchronous execution. + + Parameters + ---------- + ProcessImpl : :class:`.ProcessType` + The process (class) to register. + + """ + cls.processes[ProcessImpl.__name__] = register_process(ProcessImpl) + + def run(self, trigger: Trigger) -> None: + """Run a :class:`.Process` asynchronously.""" + _run = self.processes[self.process.name] + _run(self.process.submission_id, self.process.process_id, trigger) + + +def create_worker_app() -> Celery: + """ + Initialize the worker application. + + Returns + ------- + :class:`celery.Celery` + + """ + result_backend = config.RESULT_BACKEND + broker = config.BROKER_URL + celery_app = Celery('submission', + results=result_backend, + backend=result_backend, + result_backend=result_backend, + broker=broker) + + celery_app.conf.queue_name_prefix = config.QUEUE_NAME_PREFIX + celery_app.conf.task_default_queue = config.TASK_DEFAULT_QUEUE + celery_app.conf.prefetch_multiplier = config.PREFETCH_MULTIPLIER + celery_app.conf.task_acks_late = config.TASK_ACKS_LATE + celery_app.conf.accept_content = config.CELERY_ACCEPT_CONTENT + celery_app.conf.task_serializer = config.CELERY_TASK_SERIALIZER + celery_app.conf.result_serializer = config.CELERY_RESULT_SERIALIZER + celery_app.conf.backend = result_backend + + register_save = celery_app.task( + name='save', + bind=True, + max_retries=config.MAX_SAVE_RETRIES, + default_retry_delay=config.DEFAULT_SAVE_RETRY_DELAY + ) + register_save(async_save) + return celery_app + + +def get_or_create_worker_app() -> Celery: + """ + Get the current worker app, or create one. + + Uses the Flask application global to keep track of the worker app. + """ + g = get_application_global() + if not g: + return create_worker_app() + if 'worker' not in g: + g.worker = create_worker_app() + return g.worker + + +@retry(Recoverable, backoff=2) +def async_save(self, *events: Event, submission_id: int = -1) -> None: + """ + Save/emit new events. + + Parameters + ---------- + events + Each item is an :class:`.Event`. + submission_id : int + Identifier of the submission to which the commands/events apply. + + """ + if submission_id < 0: + raise RuntimeError('Invalid submission ID') + try: + save(*events, submission_id=submission_id) + except NothingToDo as e: + logger.debug('No events to save, move along: %s', e) + except classic.Unavailable as e: + raise Recoverable('Database is not available; try again') from e + except classic.ConsistencyError as e: + logger.error('Encountered a ConsistencyError; could not save: %s', e) + raise Failed('Encountered a consistency error') from e + except Exception as e: + raise Failed('Unhandled exception: %s' % e) from e + + +def execute_async_save(*events: Event, submission_id: int = -1): + """ + Save events asynchronously, using :func:`.async_save`. + + Parameters + ---------- + events + Each item is an :class:`.Event`. + submission_id : int + Identifier of the submission to which the commands/events apply. + + """ + if submission_id < 0: + raise RuntimeError('Invalid submission ID') + kwargs = {'submission_id': submission_id} + get_or_create_worker_app().send_task('save', (*events,), kwargs) + + +def make_countdown(delay: int, backoff: Optional[int] = 1, + max_delay: Optional[int] = None, + jitter: Union[int, Tuple[int, int]] = 0) \ + -> Callable[[int], Union[int, float]]: + """ + Make a countdown callable based on the retry parameters of a step. + + For use in task retry calls, to customize the retry delay. + + Parameters + ---------- + delay : int + The base number of seconds to wait before retrying. + backoff : int + If provided, the exponent applied to the delay * number of attempts. + max_delay : int + If provided, the maximum number of seconds to wait. + jitter : int or tuple + If an int, number of seconds (+/-) to alter the delay, after the + backoff is applied. If a two-tuple of ints, + a random offset will be used in the range (jitter[0], jitter[1]). + + Returns + ------- + function + Countdown function; accepts a single int parameter representing the + current retry count. + + """ + if max_delay is None: + max_delay = math.inf + + def countdown(retries: int) -> int: + this_delay = delay if backoff is None else (delay * retries) ** backoff + if jitter: + if type(jitter) is int: + this_delay += jitter + elif type(jitter) is tuple and len(jitter) == 2: + n, x = jitter + this_delay += random.random() * (x - n) + x + return min(this_delay, max_delay) + return countdown + + +def make_task(app: Celery, Proc: ProcessType, step: Callable) -> Task: + """ + Generate an asynchronous task to perform a step. + + Parameters + ---------- + app : :class:`celery.Celery` + The Celery application on which to register the task. + Proc : :class:`.ProcessType` + The process (class) for which to make a task. + step : function + The specific step of the process to make into a task. + + Returns + ------- + :class:`celery.Task` + An asynchronous task that performs ``step``. + + """ + countdown = make_countdown(step.delay, step.backoff, step.max_delay) + + @app.task(name=f'{Proc.__name__}.{step.name}', bind=True, + max_retries=step.max_retries, default_retry_delay=step.delay) + def do_step(self, data: ProcessData) -> Any: + logger.debug('Do step %s', step.name) + emit = partial(async_save, self, submission_id=data.submission_id) + previous = data.get_last_result() if data.results else None + try: + inst = Proc(data.submission_id, data.process_id) + data.add_result(step(inst, previous, data.trigger, emit)) + except Failed as exc: + raise exc # This is a deliberately unrecoverable failure. + except Exception as exc: + # Any other exception deserves more chances. + self.retry(exc=exc, countdown=countdown(self.request.retries)) + return data + return do_step + + +def make_failure_task(app: Celery, Proc: ProcessType) -> Task: + """ + Make a :class:`.Task` to handle failure of a process. + + Parameters + ---------- + app : :class:`celery.Celery` + The Celery application on which to register the task. + Proc : :class:`.ProcessType` + The process (class) for which the failure handler should be + generated. + + Returns + ------- + :class:`celery.Task` + An asynchronous task that can be used as an errback for an async + process. + + """ + @app.task + def on_failure(request, exc, traceback): # type: ignore + data, = request.args + name = getattr(exc, 'step_name', 'none') + events = [] + process = Proc(data.submission_id, data.process_id) + process.on_failure(name, data.trigger, events.append) + execute_async_save(*events, submission_id=data.submission_id) + return on_failure + + +def register_process(Proc: ProcessType) -> Callable: + """ + Generate an asynchronous variant of a :class:`.Process`. + + Builds a chain of asynchronous :class:`celery.Task`s from the steps in the + process. + + Parameters + ---------- + Proc : :class:`.ProcessType` + The process (class) to be registered for asynchronous execution. + + Returns + ------- + function + A function that, when called, dispatches the process for asynchronous + execution. + + """ + app = get_or_create_worker_app() + + # make_step_task() will build and register an asynchronous variant of the + # step (callable). + process = chain(*[make_task(app, Proc, step).s() for step in Proc.steps]) + on_failure = make_failure_task(app, Proc) + + def execute_chain(submission_id: int, process_id: str, trigger: Trigger): + logger.debug('Execute chain %s with id %s for submission %s', + Proc.__name__, process_id, submission_id) + data = ProcessData(submission_id, process_id, trigger, []) + process.apply_async((data,), link_error=on_failure.s()) + return execute_chain diff --git a/agent/agent/runner/base.py b/agent/agent/runner/base.py new file mode 100644 index 0000000..b679a05 --- /dev/null +++ b/agent/agent/runner/base.py @@ -0,0 +1,60 @@ +"""Provides a base implementation of a process runner.""" + +from typing import Optional, Any, Callable + +from retry.api import retry_call + +from arxiv.submission.domain.submission import Submission +from arxiv.submission.domain.event import Event +from arxiv.submission.domain.agent import Agent +from arxiv.submission import save +from arxiv.base import logging + +from ..process import Process, Failed, Recoverable, Retry +from ..domain import Trigger, ProcessData + +logger = logging.getLogger(__name__) +logger.propagate = False + + +class ProcessRunner: + """Basic implementation of a synchronous process runner.""" + + def __init__(self, process: Process) -> None: + """Initialize with a :class:`.Process`.""" + self.process = process + + def do(self, step_name: str, previous: Any, trigger: Trigger, + emit: Callable) -> Any: + """Perform a step with configured retrying.""" + step = getattr(self.process, step_name) + + def _do_step(previous, trigger, emit): + try: + return step(previous, trigger, emit) + except Failed as e: + raise e + except Exception as e: + raise Recoverable() from e + return retry_call(_do_step, fargs=(previous, trigger, emit), + exceptions=(Recoverable,), tries=step.max_retries, + delay=step.delay, backoff=step.backoff, + max_delay=step.max_delay, jitter=step.jitter) + + def run(self, trigger: Trigger) -> None: + """Execute the process synchronously.""" + events = [] + self.process.before_start(trigger, events.append) + result = None + logger.debug('%s started', self.process.name) + for step in self.process.steps: + try: + result = self.do(step.name, result, trigger, events.append) + self.process.on_success(step.name, trigger, events.append) + logger.debug('%s:%s succeeded', self.process.name, step.name) + except Exception: + self.process.on_failure(step.name, trigger, events.append) + logger.debug('%s:%s failed', self.process.name, step.name) + finally: + save(*events, submission_id=self.process.submission_id) + events.clear() diff --git a/agent/agent/runner/tests/__init__.py b/agent/agent/runner/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent/agent/runner/tests/test_run_process.py b/agent/agent/runner/tests/test_run_process.py new file mode 100644 index 0000000..b30a087 --- /dev/null +++ b/agent/agent/runner/tests/test_run_process.py @@ -0,0 +1,101 @@ +"""Test running processes.""" + +from unittest import TestCase, mock + +from ... import process +from ...runner import base, async_runner + + +class TestProcess(TestCase): + """Test running a process synchronously.""" + + def setUp(self): + """Given a synchronous process.""" + class FooProcess(process.Process): + @process.step() + def step_a(self, previous, trigger, emit): + return trigger.event.some_value + 1 + + @process.step() + def step_b(self, previous, trigger, emit): + return (previous + 1) ** 2 + + @process.step() + def step_c(self, previous, trigger, emit): + if previous > 20: + self.fail(message='fail like it\'s 1999') + return (previous + 1) ** 2 + + self.FooProcess = FooProcess + self.submission_id = 24543 + self.trigger = { + 'event': mock.MagicMock(submission_id=self.submission_id), + } + + @mock.patch(f'{process.__name__}.base.AddProcessStatus', mock.MagicMock) + @mock.patch(f'{base.__name__}.save') + def test_call(self, mock_save): + """Calling the process runs all steps in order.""" + saved_events = [] + + def append_events(*events, submission_id=None): + for event in events: + saved_events.append((event, submission_id)) + + mock_save.side_effect = append_events + + proc = self.FooProcess(self.submission_id) + runner = base.ProcessRunner(proc) + runner.run(mock.MagicMock(event=mock.MagicMock(some_value=1))) + + self.assertEqual(saved_events[0][0].status, + process.Process.Status.PENDING) + self.assertEqual(saved_events[0][1], self.submission_id) + self.assertEqual(saved_events[1][0].status, + process.Process.Status.IN_PROGRESS) + self.assertEqual(saved_events[1][0].step, 'step_a') + self.assertEqual(saved_events[1][1], self.submission_id) + + self.assertEqual(saved_events[2][0].status, + process.Process.Status.IN_PROGRESS) + self.assertEqual(saved_events[2][0].step, 'step_b') + self.assertEqual(saved_events[2][1], self.submission_id) + + self.assertEqual(saved_events[3][0].status, + process.Process.Status.SUCCEEDED) + self.assertEqual(saved_events[3][0].step, 'step_c') + self.assertEqual(saved_events[3][1], self.submission_id) + + @mock.patch(f'{process.__name__}.base.AddProcessStatus', mock.MagicMock) + @mock.patch(f'{base.__name__}.save') + def test_failing_process(self, mock_save): + """Calling the process runs all steps in order, but one fails.""" + saved_events = [] + + def append_events(*events, submission_id=None): + for event in events: + saved_events.append((event, submission_id)) + + mock_save.side_effect = append_events + + proc = self.FooProcess(self.submission_id) + runner = base.ProcessRunner(proc) + runner.run(mock.MagicMock(event=mock.MagicMock(some_value=5))) + + self.assertEqual(saved_events[0][0].status, + process.Process.Status.PENDING) + self.assertEqual(saved_events[0][1], self.submission_id) + self.assertEqual(saved_events[1][0].status, + process.Process.Status.IN_PROGRESS) + self.assertEqual(saved_events[1][0].step, 'step_a') + self.assertEqual(saved_events[1][1], self.submission_id) + + self.assertEqual(saved_events[2][0].status, + process.Process.Status.IN_PROGRESS) + self.assertEqual(saved_events[2][0].step, 'step_b') + self.assertEqual(saved_events[2][1], self.submission_id) + + self.assertEqual(saved_events[3][0].status, + process.Process.Status.FAILED) + self.assertEqual(saved_events[3][0].step, 'step_c') + self.assertEqual(saved_events[3][1], self.submission_id) diff --git a/agent/agent/serializer.py b/agent/agent/serializer.py new file mode 100644 index 0000000..83b6e7e --- /dev/null +++ b/agent/agent/serializer.py @@ -0,0 +1,56 @@ +"""JSON serialization for submission core.""" + +from typing import Any, Union, List +import json +from datetime import datetime, date +from dataclasses import asdict +from enum import Enum +from importlib import import_module +from .domain import Trigger, ProcessData + +from arxiv.submission.serializer import EventJSONEncoder, EventJSONDecoder + + +class ProcessJSONEncoder(EventJSONEncoder): + """Encodes domain objects in this package for serialization.""" + + def default(self, obj): + """Serialize objects in this application domain.""" + if type(obj) is Trigger: + data = {'__type__': 'Trigger', + '__data__': asdict(obj)} + elif type(obj) is ProcessData: + data = {'__type__': 'ProcessData', + '__data__': asdict(obj)} + else: + data = super(ProcessJSONEncoder, self).default(obj) + return data + + +class ProcessJSONDecoder(EventJSONDecoder): + """Decode :class:`.Trigger` and other domain objects from JSON data.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Pass :func:`object_hook` to the base constructor.""" + kwargs['object_hook'] = kwargs.get('object_hook', self.object_hook) + super(ProcessJSONDecoder, self).__init__(*args, **kwargs) + + def object_hook(self, obj: dict, **extra: Any) -> Any: + """Decode domain objects in this package.""" + obj = super(ProcessJSONDecoder, self).object_hook(obj, **extra) + if isinstance(obj, dict) and '__type__' in obj: + if obj['__type__'] == 'Trigger': + return Trigger(**obj['__data__']) + elif obj['__type__'] == 'ProcessData': + return ProcessData(**obj['__data__']) + return obj + + +def dumps(obj: Any) -> str: + """Generate JSON from a Python object.""" + return json.dumps(obj, cls=ProcessJSONEncoder) + + +def loads(data: str) -> Any: + """Load a Python object from JSON.""" + return json.loads(data, cls=ProcessJSONDecoder) diff --git a/agent/agent/services/__init__.py b/agent/agent/services/__init__.py new file mode 100644 index 0000000..5c1157d --- /dev/null +++ b/agent/agent/services/__init__.py @@ -0,0 +1 @@ +"""Service integrations.""" diff --git a/agent/agent/services/database.py b/agent/agent/services/database.py new file mode 100644 index 0000000..671b01f --- /dev/null +++ b/agent/agent/services/database.py @@ -0,0 +1,167 @@ +"""Lightweight database integration for checkpointing.""" + +from typing import Optional, Any +from datetime import datetime +from pytz import UTC +import time + +from flask import Flask +from flask_sqlalchemy import SQLAlchemy + +from sqlalchemy import BigInteger, Column, DateTime, Enum, ForeignKey, \ + ForeignKeyConstraint, Index, \ + Integer, SmallInteger, String, Table, text, Text +from sqlalchemy.dialects.mysql import DATETIME +from sqlalchemy.exc import OperationalError +from retry import retry + +from arxiv.base import logging +from arxiv.submission.domain.event import AddProcessStatus +from ..rules import Rule +from ..process import Process + +db: SQLAlchemy = SQLAlchemy() +logger = logging.getLogger(__name__) +logger.propagate = False + + +class Checkpoint(db.Model): + """Stores checkpoint information for the Kinesis consumer.""" + + __tablename__ = 'checkpoint' + __bind_key__ = 'agent' + + id = Column(Integer, primary_key=True) + position = Column(String(255), index=True, nullable=False) + created = Column(DATETIME(6), default=lambda: datetime.now(UTC)) + shard_id = Column(String(255), index=True, nullable=False) + + +class ProcessStatusEvent(db.Model): + """Stores events related to processes.""" + + __tablename__ = 'process_status_events' + __bind_key__ = 'agent' + + id = Column(Integer, primary_key=True) + created = Column(DATETIME(6), index=True, nullable=False) + received = Column(DATETIME(6), index=True, nullable=False, + default=lambda: datetime.now(UTC)) + event_id = Column(String(255), index=True, nullable=False) + submission_id = Column(Integer, index=True) + process_id = Column(String(100), index=True, nullable=False) + process = Column(String(100), index=True, nullable=False) + status = Column(String(50), index=True, nullable=True) + reason = Column(Text, nullable=True) + agent_type = Column(Enum('System', 'User', 'Client'), index=True, + nullable=False) + agent_id = Column(String(100), index=True, nullable=False) + + +def init_app(app: Flask) -> None: + """Set configuration defaults and attach session to the application.""" + db.init_app(app) + + @app.teardown_request + def teardown_request(exception) -> None: + if exception: + db.session.rollback() + db.session.remove() + + @app.teardown_appcontext + def teardown_appcontext(*args, **kwargs) -> None: + db.session.rollback() + db.session.remove() + + +def create_all() -> None: + """Create all tables in the agent database.""" + db.create_all(bind='agent') + + +def tables_exist() -> bool: + """Determine whether or not these database tables exist.""" + try: + db.session.query("1").from_statement(text("SELECT 1 FROM checkpoint limit 1")).all() + db.session.query("1").from_statement(text("SELECT 1 FROM process_status_events limit 1")).all() + except Exception as e: + return False + return True + # return db.engine.dialect.has_table(db.engine, 'checkpoint') + + +class Unavailable(IOError): + """The database is not available.""" + + +@retry(Unavailable, tries=3, backoff=2) +def get_latest_position(shard_id: str) -> str: + """Get the latest checkpointed position.""" + try: + result = db.session.query(Checkpoint.position) \ + .filter(Checkpoint.shard_id == shard_id) \ + .order_by(Checkpoint.id.desc()) \ + .first() + if result is None: + return + position, = result + except OperationalError as e: + raise Unavailable('Caught op error') from e + return position + + +@retry(Unavailable, tries=3, backoff=2) +def store_position(position: str, shard_id: str) -> None: + """Store a new checkpoint position.""" + try: + db.session.add(Checkpoint(position=position, shard_id=shard_id)) + db.session.commit() + except OperationalError as e: + db.session.rollback() + raise Unavailable('Caught op error') from e + + +def store_event(event: AddProcessStatus) -> None: + """Store an :class:`.AddProcessStatus` event.""" + try: + db.session.add(ProcessStatusEvent( + created=event.created, + event_id=event.event_id, + submission_id=event.submission_id, + process_id=event.process_id, + process=event.process, + status=event.status, + reason=event.reason, + agent_type=event.creator.agent_type, + agent_id=event.creator.native_id + )) + db.session.commit() + except OperationalError as e: + db.session.rollback() + raise Unavailable('Caught op error') from e + + +def await_connection(max_wait: int = -1) -> None: + """Wait for the database to be available.""" + logger.info('Waiting for database server to be available') + wait = 2 + start = time.time() + while True: + if max_wait > 0 and time.time() - start >= max_wait: + raise Unavailable('Failed to connect in %i seconds', max_wait) + + if is_available(): + break + logger.info(f'...waiting {wait} seconds...') + time.sleep(wait) + wait *= 2 + + +def is_available(**kwargs: Any) -> bool: + """Check our connection to the database.""" + try: + db.session.query("1").from_statement(text("SELECT 1")).all() + except Exception as e: + logger.error('Encountered an error talking to database: %s', e) + return False + return True diff --git a/agent/agent/tasks.py b/agent/agent/tasks.py new file mode 100644 index 0000000..0b4ce99 --- /dev/null +++ b/agent/agent/tasks.py @@ -0,0 +1,4 @@ +from .runner import AsyncProcessRunner +from .foo import FooProcess + +AsyncProcessRunner.prepare(FooProcess) diff --git a/agent/agent/tests/__init__.py b/agent/agent/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/agent/agent/tests/test_serializer.py b/agent/agent/tests/test_serializer.py new file mode 100644 index 0000000..ec1a7c1 --- /dev/null +++ b/agent/agent/tests/test_serializer.py @@ -0,0 +1,48 @@ +from unittest import TestCase +from datetime import datetime +from pytz import UTC +from arxiv.submission import Submission, SetTitle, User, \ + SubmissionMetadata +from ..domain import ProcessData, Trigger + +from ..serializer import dumps, loads + + +class TestSerialize(TestCase): + def test_serialize_trigger(self): + """Serialize and deserialize a :class:`.Trigger`.""" + creator = User(1234, username='foo', email='foo@bar.com') + event = SetTitle(creator=creator, title='the title', + created=datetime.now(UTC)) + trigger = Trigger( + actor=creator, + event=event, + before=Submission(creator=creator, created=event.created, + owner=creator), + after=Submission(creator=creator, created=event.created, + owner=creator, + metadata=SubmissionMetadata(title='the title')), + ) + deserialized = loads(dumps(trigger)) + self.assertIsInstance(deserialized, Trigger) + self.assertEqual(deserialized, trigger) + + def test_serialize_processdata(self): + """Serialize and deserialize a :class:`.ProcessData`.""" + creator = User(1234, username='foo', email='foo@bar.com') + event = SetTitle(creator=creator, title='the title', + created=datetime.now(UTC)) + trigger = Trigger( + actor=creator, + event=event, + before=Submission(creator=creator, created=event.created, + owner=creator), + after=Submission(creator=creator, created=event.created, + owner=creator, + metadata=SubmissionMetadata(title='the title')), + ) + data = ProcessData(submission_id=2, process_id='fooid', + trigger=trigger, results=[1, 'a']) + deserialized = loads(dumps(data)) + self.assertIsInstance(deserialized, ProcessData) + self.assertEqual(deserialized, data) diff --git a/agent/agent/worker.py b/agent/agent/worker.py new file mode 100644 index 0000000..756b6ff --- /dev/null +++ b/agent/agent/worker.py @@ -0,0 +1,27 @@ +""" +Submission Agent Worker +======================= + +The worker is a `Celery `_ application that scales +horizontally to run :class:`.Process` instances dispatched by the consumer. +Processes are divided into smaller steps that are run in series. Keeping steps +small makes the overall process more fault-tolerant; if a step fails for some +reason (e.g. worker crashes, meteor strike), it can be retried without +discarding expensive results from previous steps. + + +""" + +from flask import Flask + +from .runner.async_runner import get_or_create_worker_app +from .factory import create_app + +import logging + +logging.getLogger('arxiv.submission.services.classic.interpolate') \ + .setLevel(logging.ERROR) + +app = create_app() +app.app_context().push() +worker_app = get_or_create_worker_app() diff --git a/agent/deploy/submission-agent/Chart.yaml b/agent/deploy/submission-agent/Chart.yaml new file mode 100644 index 0000000..af0dec2 --- /dev/null +++ b/agent/deploy/submission-agent/Chart.yaml @@ -0,0 +1,7 @@ +name: submission-agent +version: 0.0.1 +appVersion: 0.0.1 +description: Submission agent. +sources: + - https://github.com/arxiv/arxiv-submission-core +engine: gotpl diff --git a/agent/deploy/submission-agent/templates/00-service-account.yaml b/agent/deploy/submission-agent/templates/00-service-account.yaml new file mode 100644 index 0000000..a3c3de0 --- /dev/null +++ b/agent/deploy/submission-agent/templates/00-service-account.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +kind: ServiceAccount +metadata: + namespace: "{{ .Values.namespace }}" + name: "{{ default "submission-agent" .Values.name }}" + labels: + subsystem: "{{ .Values.labels.subsystem }}" + service-group: "{{ .Values.labels.service_group }}" diff --git a/agent/deploy/submission-agent/templates/10-agent-deployment.yaml b/agent/deploy/submission-agent/templates/10-agent-deployment.yaml new file mode 100644 index 0000000..94ccd0d --- /dev/null +++ b/agent/deploy/submission-agent/templates/10-agent-deployment.yaml @@ -0,0 +1,114 @@ +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: "{{ default "submission-agent" .Values.name }}" + namespace: "{{ .Values.namespace }}" + labels: + subsystem: "{{ .Values.labels.subsystem }}" + container: "{{ default "submission-agent" .Values.name }}" + service-group: "{{ .Values.labels.service_group }}" + log-style: python +spec: + replicas: 1 + template: + metadata: + labels: + subsystem: "{{ .Values.labels.subsystem }}" + container: "{{ default "submission-agent" .Values.name }}" + service-group: "{{ .Values.labels.service_group }}" + log-style: python + spec: + serviceAccount: "{{ default "submission-agent" .Values.name }}" + volumes: + - name: vault-token + emptyDir: {} + - name: vault-certificate + secret: + secretName: vault-certificate + + containers: + - name: "{{ default "submission-agent" .Values.name }}" + image: "{{ default "arxiv/submission-agent" .Values.image.name }}:{{ default "latest" .Values.image.tag }}" + imagePullPolicy: "{{ default "Always" .Values.imagePullPolicy }}" + env: + - name: LOGLEVEL + value: "{{ .Values.loglevel }}" + - name: SUBMISSION_AGENT_BROKER_URL + value: "redis://{{ .Values.redis.host }}:{{ .Values.redis.port }}/{{ .Values.redis.db }}" + - name: AGENT_DATABASE_HOST + value: "{{ .Values.database.host }}" + + - name: VAULT_ENABLED + value: "1" + - name: VAULT_HOST + value: "{{ .Values.vault.host }}" + - name: VAULT_PORT + value: "{{ .Values.vault.port }}" + - name: VAULT_CERT + value: /etc/vault-certificate/vaulttls.cert.pem + - name: VAULT_ROLE + value: "{{ .Values.vault.role }}-{{ .Values.namespace }}" + - name: VAULT_CREDENTIAL + value: "{{ .Values.vault.credential }}" + - name: KUBE_TOKEN + value: /var/run/secrets/kubernetes.io/serviceaccount/token + - name: NAMESPACE + value: "{{ .Values.namespace }}" + + - name: WAIT_FOR_SERVICES + value: "{{ .Values.config.wait_for_services }}" + - name: WAIT_ON_STARTUP + value: "{{ .Values.config.wait_on_startup }}" + - name: KINESIS_STREAM + value: "{{ .Values.kinesis.stream }}" + - name: KINESIS_SHARD_ID + value: "{{ .Values.kinesis.shard_id }}" + + - name: CLASSIFIER_SERVICE_HOST + value: "{{ .Values.classifier.host }}" + - name: CLASSIFIER_SERVICE_PORT + value: "{{ .Values.classifier.port }}" + - name: CLASSIFIER_{{ .Values.classifier.port }}_PROTO + value: "{{ .Values.classifier.proto }}" + - name: CLASSIFIER_PATH + value: "{{ .Values.classifier.path }}" + - name: CLASSIFIER_PROTO + value: "{{ .Values.classifier.proto }}" + + - name: COMPILER_PROTO + value: "{{ .Values.compiler.proto }}" + - name: PLAINTEXT_PROTO + value: "{{ .Values.plaintext.proto }}" + - name: FILEMANAGER_PROTO + value: "{{ .Values.filemanager.proto }}" + + - name: EMAIL_ENABLED + value: "{{ .Values.smtp.enabled }}" + - name: SMTP_HOSTNAME + value: "{{ .Values.smtp.host }}" + - name: SMTP_PORT + value: "{{ .Values.smtp.port }}" + - name: SMTP_USERNAME + value: "{{ .Values.smtp.username }}" + - name: SMTP_PASSWORD + value: "{{ .Values.smtp.password }}" + - name: SMTP_SSL + value: "{{ .Values.smtp.ssl }}" + + - name: SERVER_NAME + value: "{{ .Values.ui.host }}" + + volumeMounts: + - name: vault-certificate + mountPath: /etc/vault-certificate + readOnly: true + - name: vault-token + mountPath: /etc/vault + + resources: + limits: + cpu: 300m + memory: 256Mi + requests: + cpu: 100m + memory: 128Mi diff --git a/agent/deploy/submission-agent/templates/20-worker-deployment.yaml b/agent/deploy/submission-agent/templates/20-worker-deployment.yaml new file mode 100644 index 0000000..5e4aad1 --- /dev/null +++ b/agent/deploy/submission-agent/templates/20-worker-deployment.yaml @@ -0,0 +1,115 @@ +apiVersion: apps/v1beta1 +kind: Deployment +metadata: + name: "{{ default "submission-agent" .Values.name }}-worker" + namespace: "{{ .Values.namespace }}" + labels: + subsystem: "{{ .Values.labels.subsystem }}" + container: "{{ default "submission-agent" .Values.name }}-worker" + service-group: "{{ .Values.labels.service_group }}" + log-style: python +spec: + replicas: 1 + template: + metadata: + labels: + subsystem: "{{ .Values.labels.subsystem }}" + container: "{{ default "submission-agent" .Values.name }}-worker" + service-group: "{{ .Values.labels.service_group }}" + log-style: python + spec: + serviceAccount: "{{ default "submission-agent" .Values.name }}" + volumes: + - name: vault-token + emptyDir: {} + - name: vault-certificate + secret: + secretName: vault-certificate + + containers: + - name: "{{ default "submission-agent" .Values.name }}-worker" + image: "arxiv/submission-agent:{{ default "latest" .Values.image.tag }}" + command: ["celery", "worker", "-A", "agent.worker.worker_app", "--loglevel=INFO", "-E", "--concurrency=2"] + imagePullPolicy: "{{ default "Always" .Values.imagePullPolicy }}" + env: + - name: LOGLEVEL + value: "{{ .Values.loglevel }}" + - name: SUBMISSION_AGENT_BROKER_URL + value: "redis://{{ .Values.redis.host }}:{{ .Values.redis.port }}/{{ .Values.redis.db }}" + - name: AGENT_DATABASE_HOST + value: "{{ .Values.database.host }}" + + - name: VAULT_ENABLED + value: "1" + - name: VAULT_HOST + value: "{{ .Values.vault.host }}" + - name: VAULT_PORT + value: "{{ .Values.vault.port }}" + - name: VAULT_CERT + value: /etc/vault-certificate/vaulttls.cert.pem + - name: VAULT_ROLE + value: "{{ .Values.vault.role }}-{{ .Values.namespace }}" + - name: VAULT_CREDENTIAL + value: "{{ .Values.vault.credential }}" + - name: KUBE_TOKEN + value: /var/run/secrets/kubernetes.io/serviceaccount/token + - name: NAMESPACE + value: "{{ .Values.namespace }}" + + - name: WAIT_FOR_SERVICES + value: "{{ .Values.config.wait_for_services }}" + - name: WAIT_ON_STARTUP + value: "{{ .Values.config.wait_on_startup }}" + - name: KINESIS_STREAM + value: "{{ .Values.kinesis.stream }}" + - name: KINESIS_SHARD_ID + value: "{{ .Values.kinesis.shard_id }}" + + - name: CLASSIFIER_SERVICE_HOST + value: "{{ .Values.classifier.host }}" + - name: CLASSIFIER_SERVICE_PORT + value: "{{ .Values.classifier.port }}" + - name: CLASSIFIER_{{ .Values.classifier.port }}_PROTO + value: "{{ .Values.classifier.proto }}" + - name: CLASSIFIER_PATH + value: "{{ .Values.classifier.path }}" + - name: CLASSIFIER_PROTO + value: "{{ .Values.classifier.proto }}" + + - name: COMPILER_PROTO + value: "{{ .Values.compiler.proto }}" + - name: PLAINTEXT_PROTO + value: "{{ .Values.plaintext.proto }}" + - name: FILEMANAGER_PROTO + value: "{{ .Values.filemanager.proto }}" + + - name: EMAIL_ENABLED + value: "{{ .Values.smtp.enabled }}" + - name: SMTP_HOSTNAME + value: "{{ .Values.smtp.host }}" + - name: SMTP_PORT + value: "{{ .Values.smtp.port }}" + - name: SMTP_USERNAME + value: "{{ .Values.smtp.username }}" + - name: SMTP_PASSWORD + value: "{{ .Values.smtp.password }}" + - name: SMTP_SSL + value: "{{ .Values.smtp.ssl }}" + + - name: SERVER_NAME + value: "{{ .Values.ui.host }}" + + volumeMounts: + - name: vault-certificate + mountPath: /etc/vault-certificate + readOnly: true + - name: vault-token + mountPath: /etc/vault + + resources: + limits: + cpu: 1000m + memory: 1028Mi + requests: + cpu: 500m + memory: 512Mi diff --git a/agent/deploy/submission-agent/values.yaml b/agent/deploy/submission-agent/values.yaml new file mode 100644 index 0000000..afbacac --- /dev/null +++ b/agent/deploy/submission-agent/values.yaml @@ -0,0 +1,65 @@ +name: submission-agent +namespace: development +loglevel: 20 +image: + name: "arxiv/submission-agent" + tag: "0.0" + +labels: + subsystem: submission-moderation + service_group: backend + + +redis: + host: localhost + port: 6379 + db: 0 + +vault: + host: changeme + port: 8200 + role: submission-agent + credential: submission-agent + + +config: + wait_for_services: 1 + wait_on_startup: 5 + +kinesis: + stream: SubmissionEvents-development + shard_id: 0 + +classifier: + host: localhost + port: 8000 + proto: http + path: + +compiler: + proto: http + +plaintext: + proto: http + +filemanager: + proto: http + +smtp: + enabled: 0 + host: localhost + port: 0 + username: foouser + password: foopassword + ssl: 0 + +ui: + host: development.arxiv.org + path: "/submit" + +database: + host: localhost + +# classic: +# database: +# uri: "mysql+mysqldb://foo:oof@localhost:3306/arXiv" diff --git a/agent/profile.yml b/agent/profile.yml new file mode 100644 index 0000000..9011882 --- /dev/null +++ b/agent/profile.yml @@ -0,0 +1,49 @@ +# Application profile + +application: + slug: submission-agent + subsystem: submission-moderation + service_group: backend + containers: + - name: worker + type: celery + resources: + memory: + min: 128Mi + max: 256Mi + cpu: + min: 100m + max: 300m + - name: agent + type: kinesis + resources: + memory: + min: 128Mi + max: 256Mi + cpu: + min: 100m + max: 300m + dependencies: + - name: database + type: maria + description: Checkpoint DB. + - name: classic + type: mysql + description: Classic submission DB. + - name: filemanager + type: http + description: File manager service. + - name: compiler + type: http + description: Compiler service. + - name: plaintext + type: http + description: Plain text extraction service. + - name: classifier + type: http + description: Classifier service. + - name: mail + type: smtp + description: arXiv SMTP server. + - type: kinesis + description: SubmissionEvents diff --git a/authorization/Dockerfile b/authorization/Dockerfile deleted file mode 100644 index f27671a..0000000 --- a/authorization/Dockerfile +++ /dev/null @@ -1,28 +0,0 @@ -# arxiv/submission-authorization -# -# This is a simple mockup for an eventual authorization service. - -FROM arxiv/base:0.7.1 - -WORKDIR /opt/arxiv - -ENV LC_ALL en_US.utf8 -ENV LANG en_US.utf8 - -# Add Python consumer and configuration. -ADD Pipfile /opt/arxiv/Pipfile -ADD Pipfile.lock /opt/arxiv/Pipfile.lock -RUN pip install pipenv -RUN pipenv install -RUN pipenv install uwsgi - -ADD . /opt/arxiv/ - -ENV JWT_SECRET "foo" -EXPOSE 8000 - -#CMD /bin/bash - -CMD pipenv run uwsgi --http-socket :8000 -w auth -M \ - -t 3000 --manage-script-name \ - --processes 8 --threads 1 --async 100 --ugreen \ diff --git a/authorization/Pipfile b/authorization/Pipfile deleted file mode 100644 index 2f3574a..0000000 --- a/authorization/Pipfile +++ /dev/null @@ -1,17 +0,0 @@ -[[source]] - -url = "https://pypi.python.org/simple" -verify_ssl = true -name = "pypi" - - -[packages] - -pytz = "*" -flask = "*" -pyjwt = "*" -arxiv-base = "*" - - -[dev-packages] - diff --git a/authorization/Pipfile.lock b/authorization/Pipfile.lock deleted file mode 100644 index 7e5b2ce..0000000 --- a/authorization/Pipfile.lock +++ /dev/null @@ -1,92 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "37c39cba5904625be556254609d5105f61fdd56a5c132ac7458736dc85ae2dfe" - }, - "host-environment-markers": { - "implementation_name": "cpython", - "implementation_version": "3.6.1", - "os_name": "posix", - "platform_machine": "x86_64", - "platform_python_implementation": "CPython", - "platform_release": "16.7.0", - "platform_system": "Darwin", - "platform_version": "Darwin Kernel Version 16.7.0: Thu Jun 15 17:36:27 PDT 2017; root:xnu-3789.70.16~2/RELEASE_X86_64", - "python_full_version": "3.6.1", - "python_version": "3.6", - "sys_platform": "darwin" - }, - "pipfile-spec": 6, - "requires": {}, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.python.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "arxiv-base": { - "hashes": [ - "sha256:60840fad2c607aac0554135683e7f41aae786a070c60b0177bb0d0d42a02a73d" - ], - "version": "==0.5.1" - }, - "click": { - "hashes": [ - "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", - "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" - ], - "version": "==6.7" - }, - "flask": { - "hashes": [ - "sha256:0749df235e3ff61ac108f69ac178c9770caeaccad2509cb762ce1f65570a8856", - "sha256:49f44461237b69ecd901cc7ce66feea0319b9158743dd27a2899962ab214dac1" - ], - "version": "==0.12.2" - }, - "itsdangerous": { - "hashes": [ - "sha256:cbb3fcf8d3e33df861709ecaf89d9e6629cff0a217bc2848f1b41cd30d360519" - ], - "version": "==0.24" - }, - "jinja2": { - "hashes": [ - "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", - "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" - ], - "version": "==2.10" - }, - "markupsafe": { - "hashes": [ - "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665" - ], - "version": "==1.0" - }, - "pyjwt": { - "hashes": [ - "sha256:bca523ef95586d3a8a5be2da766fe6f82754acba27689c984e28e77a12174593", - "sha256:dacba5786fe3bf1a0ae8673874e29f9ac497860955c501289c63b15d3daae63a" - ], - "version": "==1.6.1" - }, - "pytz": { - "hashes": [ - "sha256:65ae0c8101309c45772196b21b74c46b2e5d11b6275c45d251b150d5da334555", - "sha256:c06425302f2cf668f1bba7a0a03f3c1d34d4ebeef2c72003da308b3947c7f749" - ], - "version": "==2018.4" - }, - "werkzeug": { - "hashes": [ - "sha256:d5da73735293558eb1651ee2fddc4d0dedcfa06538b8813a2e20011583c9e49b", - "sha256:c3fd7a7d41976d9f44db327260e263132466836cef6f91512889ed60ad26557c" - ], - "version": "==0.14.1" - } - }, - "develop": {} -} diff --git a/authorization/README.md b/authorization/README.md deleted file mode 100644 index 841a9b6..0000000 --- a/authorization/README.md +++ /dev/null @@ -1,22 +0,0 @@ -# Authorization service (demo) - -The authorization service handles subrequests from the [gateway](../gateway) -to authorize API client requests. This implementation merely mocks the -functionality of a real authorization service; for demonstration purposes only. - -## Example request lifecycle - -A typical client request might go something like this... - -``` -Client Gateway Auth Service - | --POST--> | | | - | w/token | --token-> | | - | | | | - | | <--JWT--- | | - | | | - | | --POST (w/JWT)--> | - | | Submission | --- // --> - | | <------OK-------- | - | <---OK--- | -``` diff --git a/authorization/auth.py b/authorization/auth.py deleted file mode 100644 index 750cb4a..0000000 --- a/authorization/auth.py +++ /dev/null @@ -1,65 +0,0 @@ -"""Toy authorizer implementation. For demo purposes only.""" - -import os -from flask import Flask, jsonify, request -import jwt -from arxiv.base import logging - -logger = logging.getLogger(__name__) - -JWT_SECRET = os.environ.get('JWT_SECRET', 'foo') -TOKENS = { - 'as392lks0kk32': { - 'scope': ['submission:write', 'submission:read'], - 'user': { - 'user_id': 1, - 'email': 'joe@bloggs.com' - }, - 'client': { - 'client_id': 5678 - } - }, - 'f0da9jso3l2m4': { - 'scope': ['submission:read'], - 'user': { - 'user_id': 2, - 'email': 'jane@doe.com' - }, - 'client': { - 'client_id': 5678 - } - } -} -NOPE = {'reason': 'Missing or malformed authorization header'} - -app = Flask('authorizer') - - -@app.route('/auth', methods=['GET']) -def authorize(): - """Authorize the request with an access token.""" - auth_header = request.headers.get('Authorization') - logger.debug('Got auth header: %s', auth_header) - if not auth_header: - logger.debug('Authorization header missing') - return jsonify(NOPE), 403, {} - try: - auth_token = auth_header.split(" ")[1] - except IndexError: - logger.debug('Authorization header malformed') - return jsonify(NOPE), 403, {} - - logger.debug('Got auth token') - claims = TOKENS.get(auth_token) - if not claims: - logger.debug('Access token not valid') - return jsonify(NOPE), 403, {} - logger.debug('Got claims: %s', str(claims)) - headers = {'Token': jwt.encode(claims, JWT_SECRET)} - logger.debug('Setting header') - return jsonify({'status': 'OK!'}), 200, headers - - -def application(env, start_response): - """WSGI application factory.""" - return app(env, start_response) diff --git a/authorization/authorization/__init__.py b/authorization/authorization/__init__.py deleted file mode 100644 index 9db813f..0000000 --- a/authorization/authorization/__init__.py +++ /dev/null @@ -1,22 +0,0 @@ -""" -Convience methods for request authorization. - -For demonstration purposes only. -""" - -import jwt -from flask import request, current_app, jsonify, g - -DecodeError = jwt.exceptions.DecodeError - - -def get_auth_token() -> str: - """Retrieve the Authorization header from the request.""" - return request.headers.get('Authorization') - - -def decode_authorization_token() -> dict: - """Retrieve and decode a JWT from the Authorization header.""" - secret = current_app.config.get('JWT_SECRET') - encoded = request.headers.get('Authorization') - return jwt.decode(encoded, secret, algorithms=['HS256']) diff --git a/authorization/authorization/decorators.py b/authorization/authorization/decorators.py deleted file mode 100644 index 37df3ba..0000000 --- a/authorization/authorization/decorators.py +++ /dev/null @@ -1,40 +0,0 @@ -"""Provides scope-based authorization with JWT. For demo purposes only.""" - -from functools import wraps -from flask import request, g -from werkzeug.exceptions import Unauthorized, Forbidden -from arxiv import status -from . import decode_authorization_token, DecodeError, get_auth_token - - -INVALID_TOKEN = {'reason': 'Invalid authorization token'} -INVALID_SCOPE = {'reason': 'Token not authorized for this action'} - - -def scoped(scope_required: str): - """Generate a decorator to enforce scope authorization.""" - def protector(func): - """Decorator that provides scope enforcement.""" - @wraps(func) - def wrapper(*args, **kwargs): - """Check the authorization token before executing the method.""" - # Attach the encrypted token so that we can use it in subrequests. - auth_data = request.environ.get('auth') - if auth_data is None: - raise Unauthorized('Missing authentication credentials') - scope = auth_data.get('scope') - user = auth_data.get('user') - client = auth_data.get('client') - token = auth_data.get('token') - - if scope is None or user is None or token is None: - raise Unauthorized('Missing authentication credentials') - - if scope_required not in scope: - raise Forbidden('Missing required scope') - g.user = user - g.client = client - g.token = token - return func(*args, **kwargs) - return wrapper - return protector diff --git a/authorization/authorization/middleware.py b/authorization/authorization/middleware.py deleted file mode 100644 index fcfa6d8..0000000 --- a/authorization/authorization/middleware.py +++ /dev/null @@ -1,44 +0,0 @@ -"""Middleware for decoding JWTs on requests. For demo purposes only.""" - -import os -from typing import Callable, Iterable, Tuple -import jwt - -from arxiv.base.middleware import BaseMiddleware - - -class AuthMiddleware(BaseMiddleware): - """ - Middleware to handle auth information on requests. - - Before the request is handled by the application, the ``Authorization`` - header is parsed for an encrypted JWT. If successfully decrypted, - information about the user and their authorization scope is attached - to the request. - - This can be accessed in the application via - ``flask.request.environ['auth']``. If Authorization header was not - included, or if the JWT could not be decrypted, then that value will be - ``None``. - """ - - def before(self, environ: dict, start_response: Callable) \ - -> Tuple[dict, Callable]: - """Parse the ``Authorization`` header in the response.""" - token = environ.get('HTTP_AUTHORIZATION') - jwt_secret = os.environ.get('JWT_SECRET') - environ['auth'] = None - if not token: - return environ, start_response - try: - decoded = jwt.decode(token, jwt_secret, algorithms=['HS256']) - except jwt.exceptions.DecodeError: # type: ignore - return environ, start_response - - environ['auth'] = { - 'scope': decoded.get('scope', []), - 'user': decoded.get('user'), - 'client': decoded.get('client'), - 'token': token - } - return environ, start_response diff --git a/compile/README.md b/compile/README.md deleted file mode 100644 index 5635e64..0000000 --- a/compile/README.md +++ /dev/null @@ -1,5 +0,0 @@ -# Compilation service stub - -This is a mock implementation of the compilation service, to support -testing and simulation of the submission system. Once a stable version of the -compilation service is available, it should be used directly. diff --git a/compile/schema/openapi.yaml b/compile/schema/openapi.yaml deleted file mode 100644 index 04c4878..0000000 --- a/compile/schema/openapi.yaml +++ /dev/null @@ -1,156 +0,0 @@ -openapi: "3.0.0" -info: - version: "0.1" - title: "arXiv Compilation Service" - contact: - name: "arXiv API Team" - email: nextgen@arxiv.org - license: - name: MIT -paths: - /compile: - post: - operationId: compile - summary: Request compilation of an upload package. - requestBody: - content: - application/json: - schema: - $ref: 'resources/compilationRequest.json' - responses: - '202': - description: | - The upload has been accepted for compilation. A compilation - task will have been generated, with a corresponding short-lived - task ID. That task ID can be used to check the status of the - compilation task, which will eventually redirect to the created - compilation resource if the task completes successfully. - headers: - Location: - description: Status endpoint for the compilation task. - schema: - type: "string" - /status/{task_id}: - get: - operationId: getCompilationStatus - description: | - Provides information about the status of the compilation task. - If the task has completed successfully, the response will be a redirect - to the compilation result. - parameters: - -in: path - name: task_id - description: Unique short-lived identifier for the compilation task. - required: true - schema: - type: string - responses: - '200': - description: | - Describes the current state of the process and other details. - content: - application/json: - schema: - $ref: 'resources/compileStatus.json' - '303': - description: | - Compilation has completed successfully, and the client should visit - the compilation status endpoint. - headers: - Location: - description: Location of the compile endpoint. - schema: - type: "string" - /compile/{compile_id}: - get: - operationId: getCompilationResult - description: | - Retrieve information about the result of the compilation process. - parameters: - -in: path - name: compile_id - description: Unique long-lived identifier for the compilation result. - required: true - schema: - type: string - responses: - '200': - description: | - Information about the compile result, including URIs for - the compile products (e.g. PDF, DVI, log output). - content: - application/json: - schema: - $ref: 'resources/compilationResult.json' - /compile/{compile_id}/pdf: - get: - operationId: getPDF - description: Retrieve the PDF output of the compilation process. - parameters: - -in: path - name: compile_id - description: Unique identifier for the compile. - required: true - schema: - type: string - responses: - '200': - content: - application/pdf: - schema: - type: string - format: binary - /compile/{compile_id}/dvi: - get: - operationId: getDVI - description: Retrieve the DVI output of the compilation process. - parameters: - -in: path - name: compile_id - description: Unique identifier for the compile. - required: true - schema: - type: string - responses: - '200': - content: - application/x-dvi: - schema: - type: string - format: binary - /compile/{compile_id}/ps: - get: - operationId: getPS - description: Retrieve the Postscript output of the compilation process. - parameters: - -in: path - name: compile_id - description: Unique identifier for the compile. - required: true - schema: - type: string - responses: - '200': - content: - application/postscript: - schema: - type: string - format: binary - /compile/{compile_id}/log: - get: - operationId: getLog - description: Retrieve the log output of the compilation process. - parameters: - -in: path - name: compile_id - description: Unique identifier for the compile. - required: true - schema: - type: string - responses: - '200': - content: - text/plain: - schema: - type: string - format: binary diff --git a/compile/schema/resources/compilationResult.json b/compile/schema/resources/compilationResult.json deleted file mode 100644 index 980687c..0000000 --- a/compile/schema/resources/compilationResult.json +++ /dev/null @@ -1,112 +0,0 @@ -{ - "title": "CompilationResult", - "description": "Describes the result of a source compilation. Depending on the compilation parameters, a PDF, DVI, and/or PostScript may be available.", - "additionalProperties": false, - "required": ["compile_id", "status", "log", "start_time", "completion_time"], - "type": "object", - "properties": { - "compile_id": { - "description": "Unique long-lived identifier for the compilation result.", - "type": "integer" - }, - "status": { - "description": "Final status of the compilation job.", - "type": "string", - "enum": ["FAILED", "SUCCEEDED", "CANCELLED"] - }, - "completion_time": { - "description": "The date-time when the job finished.", - "type": "string", - "format": "datetime" - }, - "start_time": { - "description": "The date-time when the job started.", - "type": "string", - "format": "datetime" - }, - "log": { - "description": "The compilation log.", - "readOnly": true, - "type": "object", - "required": "href", - "parameters": { - "href": { - "type": "string", - "format": "url", - "description": "Location of the compilation log." - }, - "format": { - "type": "string", - "description": "The mime-type of the log file." - }, - "checksum": { - "type": "string", - "description": "The MD5 hash of the log file." - } - } - }, - "pdf": { - "description": "The compiled PDF.", - "readOnly": true, - "type": "object", - "required": "href", - "parameters": { - "href": { - "type": "string", - "format": "url", - "description": "Location of the compiled PDF." - }, - "format": { - "type": "string", - "description": "The mime-type of the compiled PDF." - }, - "checksum": { - "type": "string", - "description": "The MD5 hash of the compiled PDF." - } - } - }, - "dvi": { - "description": "The compiled DVI.", - "readOnly": true, - "type": "object", - "required": "href", - "parameters": { - "href": { - "type": "string", - "format": "url", - "description": "Location of the compiled DVI." - }, - "format": { - "type": "string", - "description": "The mime-type of the compiled DVI." - }, - "checksum": { - "type": "string", - "description": "The MD5 hash of the compiled DVI." - } - } - }, - "ps": { - "description": "The compiled PostScript file.", - "readOnly": true, - "type": "object", - "required": "href", - "parameters": { - "href": { - "type": "string", - "format": "url", - "description": "Location of the compiled postscript." - }, - "format": { - "type": "string", - "description": "The mime-type of the compiled postscript." - }, - "checksum": { - "type": "string", - "description": "The MD5 hash of the compiled postscript." - } - } - } - } -} diff --git a/compile/schema/resources/compilationStatus.json b/compile/schema/resources/compilationStatus.json deleted file mode 100644 index d8795c2..0000000 --- a/compile/schema/resources/compilationStatus.json +++ /dev/null @@ -1,23 +0,0 @@ -{ - "title": "CompilationStatus", - "description": "Describes the current status of a source compilation.", - "additionalProperties": false, - "required": ["compile_id", "status", "start_time"], - "type": "object", - "properties": { - "task_id": { - "description": "Short-lived identifier for the compilation task.", - "type": "string" - }, - "status": { - "description": "Current status of the compilation job.", - "type": "string", - "enum": ["FAILED", "SUCCEEDED", "CANCELLED", "PENDING", "IN_PROGRESS"] - }, - "start_time": { - "description": "The date-time when the job started.", - "type": "string", - "format": "datetime" - } - } -} diff --git a/core/MANIFEST.in b/core/MANIFEST.in new file mode 100644 index 0000000..c90f7f1 --- /dev/null +++ b/core/MANIFEST.in @@ -0,0 +1 @@ +recursive-include arxiv/submission/templates * diff --git a/core/Pipfile b/core/Pipfile index 766e5b6..1453a46 100644 --- a/core/Pipfile +++ b/core/Pipfile @@ -1,48 +1,29 @@ [[source]] - url = "https://pypi.python.org/simple" verify_ssl = true name = "pypi" - [packages] - -"boto3" = "==1.5.8" -botocore = "==1.8.22" -certifi = "==2017.11.5" -chardet = "==3.0.4" -click = "==6.7" -coverage = "==4.4.2" -docutils = "==0.14" -flask = "*" -flask-sqlalchemy = "*" -idna = "==2.6" -itsdangerous = "==0.24" -"jinja2" = "==2.10" -jmespath = "==0.9.3" +flask = "==1.0.2" +dataclasses = "==0.6" +pyjwt = "==1.6.4" jsonschema = "==2.6.0" -markupsafe = "==1.0" -mypy = "==0.550" -nose = "==1.3.7" -"nose2" = "==0.7.2" -psutil = "==5.4.1" -"psycopg2" = "==2.7.3.2" -pyjwt = "==1.5.3" -pyld = "==0.8.2" -python-dateutil = "==2.6.1" -requests = "==2.18.4" -"s3transfer" = "==0.1.12" -sqlalchemy = "==1.1.15" -typed-ast = "==1.1.0" -"urllib3" = "==1.22" -uwsgi = "==2.0.15" -werkzeug = "==0.12.2" -openapi-spec-validator = "==0.1.2" -arxiv-base = "==0.7.1" -dataclasses = "*" -pytz = "*" - +arxiv-base = "==0.15.3rc1" +arxiv-auth = "==0.3.2rc6" +sqlalchemy = ">=1.3.0" +pytz = "==2018.7" +uwsgi = "==2.0.17.1" +mysqlclient = "==1.3.13" +mimesis = "==2.1.0" +bleach = ">=3.0.2" +python-dateutil = "*" +unidecode = "*" +mypy_extensions = "*" +urllib3 = ">=1.24.2" +Jinja2 = ">=2.10.1" +pyyaml = ">=4.2b1" +"e1839a8" = {path = "."} [dev-packages] - "nose2" = "*" +mimesis = "*" diff --git a/core/Pipfile.lock b/core/Pipfile.lock deleted file mode 100644 index c58204b..0000000 --- a/core/Pipfile.lock +++ /dev/null @@ -1,450 +0,0 @@ -{ - "_meta": { - "hash": { - "sha256": "1c540b0741ab5a7d02daedc151c60c70149ca47f801f67fbbd1c52482abe4640" - }, - "host-environment-markers": { - "implementation_name": "cpython", - "implementation_version": "3.6.1", - "os_name": "posix", - "platform_machine": "x86_64", - "platform_python_implementation": "CPython", - "platform_release": "16.7.0", - "platform_system": "Darwin", - "platform_version": "Darwin Kernel Version 16.7.0: Thu Jun 15 17:36:27 PDT 2017; root:xnu-3789.70.16~2/RELEASE_X86_64", - "python_full_version": "3.6.1", - "python_version": "3.6", - "sys_platform": "darwin" - }, - "pipfile-spec": 6, - "requires": {}, - "sources": [ - { - "name": "pypi", - "url": "https://pypi.python.org/simple", - "verify_ssl": true - } - ] - }, - "default": { - "arxiv-base": { - "hashes": [ - "sha256:60840fad2c607aac0554135683e7f41aae786a070c60b0177bb0d0d42a02a73d" - ], - "version": "==0.5.1" - }, - "boto3": { - "hashes": [ - "sha256:df129fdde1e24b20e565f27be1235cab4bde9364f1fa6775744aa9b377a763c8", - "sha256:18bc8affbb575db705a6262cc09a9a1a003a2f3c5cc1c64c9d55e637cd68a870" - ], - "version": "==1.5.8" - }, - "botocore": { - "hashes": [ - "sha256:8763f1d1ab9b56e2cde6d63af92e8e7df942c24921888e3d25476c98e98e0e05", - "sha256:40ab4b36df9c33a7c8a715b5d36d2c41e1798a66068394a579e376fbb4121ce3" - ], - "version": "==1.8.22" - }, - "certifi": { - "hashes": [ - "sha256:244be0d93b71e93fc0a0a479862051414d0e00e16435707e5bf5000f92e04694", - "sha256:5ec74291ca1136b40f0379e1128ff80e866597e4e2c1e755739a913bbc3613c0" - ], - "version": "==2017.11.5" - }, - "chardet": { - "hashes": [ - "sha256:fc323ffcaeaed0e0a02bf4d117757b98aed530d9ed4531e3e15460124c106691", - "sha256:84ab92ed1c4d4f16916e05906b6b75a6c0fb5db821cc65e70cbd64a3e2a5eaae" - ], - "version": "==3.0.4" - }, - "click": { - "hashes": [ - "sha256:29f99fc6125fbc931b758dc053b3114e55c77a6e4c6c3a2674a2dc986016381d", - "sha256:f15516df478d5a56180fbf80e68f206010e6d160fc39fa508b65e035fd75130b" - ], - "version": "==6.7" - }, - "coverage": { - "hashes": [ - "sha256:d1ee76f560c3c3e8faada866a07a32485445e16ed2206ac8378bd90dadffb9f0", - "sha256:007eeef7e23f9473622f7d94a3e029a45d55a92a1f083f0f3512f5ab9a669b05", - "sha256:17307429935f96c986a1b1674f78079528833410750321d22b5fb35d1883828e", - "sha256:845fddf89dca1e94abe168760a38271abfc2e31863fbb4ada7f9a99337d7c3dc", - "sha256:3f4d0b3403d3e110d2588c275540649b1841725f5a11a7162620224155d00ba2", - "sha256:4c4f368ffe1c2e7602359c2c50233269f3abe1c48ca6b288dcd0fb1d1c679733", - "sha256:f8c55dd0f56d3d618dfacf129e010cbe5d5f94b6951c1b2f13ab1a2f79c284da", - "sha256:cdd92dd9471e624cd1d8c1a2703d25f114b59b736b0f1f659a98414e535ffb3d", - "sha256:2ad357d12971e77360034c1596011a03f50c0f9e1ecd12e081342b8d1aee2236", - "sha256:e9a0e1caed2a52f15c96507ab78a48f346c05681a49c5b003172f8073da6aa6b", - "sha256:eea9135432428d3ca7ee9be86af27cb8e56243f73764a9b6c3e0bda1394916be", - "sha256:700d7579995044dc724847560b78ac786f0ca292867447afda7727a6fbaa082e", - "sha256:66f393e10dd866be267deb3feca39babba08ae13763e0fc7a1063cbe1f8e49f6", - "sha256:5ff16548492e8a12e65ff3d55857ccd818584ed587a6c2898a9ebbe09a880674", - "sha256:d00e29b78ff610d300b2c37049a41234d48ea4f2d2581759ebcf67caaf731c31", - "sha256:87d942863fe74b1c3be83a045996addf1639218c2cb89c5da18c06c0fe3917ea", - "sha256:358d635b1fc22a425444d52f26287ae5aea9e96e254ff3c59c407426f44574f4", - "sha256:81912cfe276e0069dca99e1e4e6be7b06b5fc8342641c6b472cb2fed7de7ae18", - "sha256:079248312838c4c8f3494934ab7382a42d42d5f365f0cf7516f938dbb3f53f3f", - "sha256:b0059630ca5c6b297690a6bf57bf2fdac1395c24b7935fd73ee64190276b743b", - "sha256:493082f104b5ca920e97a485913de254cbe351900deed72d4264571c73464cd0", - "sha256:e3ba9b14607c23623cf38f90b23f5bed4a3be87cbfa96e2e9f4eabb975d1e98b", - "sha256:82cbd3317320aa63c65555aa4894bf33a13fb3a77f079059eb5935eea415938d", - "sha256:9721f1b7275d3112dc7ccf63f0553c769f09b5c25a26ee45872c7f5c09edf6c1", - "sha256:bd4800e32b4c8d99c3a2c943f1ac430cbf80658d884123d19639bcde90dad44a", - "sha256:f29841e865590af72c4b90d7b5b8e93fd560f5dea436c1d5ee8053788f9285de", - "sha256:f3a5c6d054c531536a83521c00e5d4004f1e126e2e2556ce399bef4180fbe540", - "sha256:dd707a21332615108b736ef0b8513d3edaf12d2a7d5fc26cd04a169a8ae9b526", - "sha256:2e1a5c6adebb93c3b175103c2f855eda957283c10cf937d791d81bef8872d6ca", - "sha256:f87f522bde5540d8a4b11df80058281ac38c44b13ce29ced1e294963dd51a8f8", - "sha256:a7cfaebd8f24c2b537fa6a271229b051cdac9c1734bb6f939ccfc7c055689baa", - "sha256:309d91bd7a35063ec7a0e4d75645488bfab3f0b66373e7722f23da7f5b0f34cc", - "sha256:0388c12539372bb92d6dde68b4627f0300d948965bbb7fc104924d715fdc0965", - "sha256:ab3508df9a92c1d3362343d235420d08e2662969b83134f8a97dc1451cbe5e84", - "sha256:43a155eb76025c61fc20c3d03b89ca28efa6f5be572ab6110b2fb68eda96bfea", - "sha256:f98b461cb59f117887aa634a66022c0bd394278245ed51189f63a036516e32de", - "sha256:b6cebae1502ce5b87d7c6f532fa90ab345cfbda62b95aeea4e431e164d498a3d", - "sha256:a4497faa4f1c0fc365ba05eaecfb6b5d24e3c8c72e95938f9524e29dadb15e76", - "sha256:2b4d7f03a8a6632598cbc5df15bbca9f778c43db7cf1a838f4fa2c8599a8691a", - "sha256:1afccd7e27cac1b9617be8c769f6d8a6d363699c9b86820f40c74cfb3328921c" - ], - "version": "==4.4.2" - }, - "dataclasses": { - "hashes": [ - "sha256:7127de1976d5c6d4f374c9826aefe6564b612690a60ca957cbd5c64483326dc1", - "sha256:3f20420c953ecf4c4df68f3aaee48dea69a87935aaeceffce1176b5366b08f1e" - ], - "version": "==0.5" - }, - "docutils": { - "hashes": [ - "sha256:7a4bd47eaf6596e1295ecb11361139febe29b084a87bf005bf899f9a42edc3c6", - "sha256:02aec4bd92ab067f6ff27a38a38a41173bf01bed8f89157768c1573f53e474a6", - "sha256:51e64ef2ebfb29cae1faa133b3710143496eca21c530f3f71424d77687764274" - ], - "version": "==0.14" - }, - "flask": { - "hashes": [ - "sha256:0749df235e3ff61ac108f69ac178c9770caeaccad2509cb762ce1f65570a8856", - "sha256:49f44461237b69ecd901cc7ce66feea0319b9158743dd27a2899962ab214dac1" - ], - "version": "==0.12.2" - }, - "flask-sqlalchemy": { - "hashes": [ - "sha256:3bc0fac969dd8c0ace01b32060f0c729565293302f0c4269beed154b46bec50b", - "sha256:5971b9852b5888655f11db634e87725a9031e170f37c0ce7851cf83497f56e53" - ], - "version": "==2.3.2" - }, - "idna": { - "hashes": [ - "sha256:8c7309c718f94b3a625cb648ace320157ad16ff131ae0af362c9f21b80ef6ec4", - "sha256:2c6a5de3089009e3da7c5dde64a141dbc8551d5b7f6cf4ed7c2568d0cc520a8f" - ], - "version": "==2.6" - }, - "itsdangerous": { - "hashes": [ - "sha256:cbb3fcf8d3e33df861709ecaf89d9e6629cff0a217bc2848f1b41cd30d360519" - ], - "version": "==0.24" - }, - "jinja2": { - "hashes": [ - "sha256:74c935a1b8bb9a3947c50a54766a969d4846290e1e788ea44c1392163723c3bd", - "sha256:f84be1bb0040caca4cea721fcbbbbd61f9be9464ca236387158b0feea01914a4" - ], - "version": "==2.10" - }, - "jmespath": { - "hashes": [ - "sha256:f11b4461f425740a1d908e9a3f7365c3d2e569f6ca68a2ff8bc5bcd9676edd63", - "sha256:6a81d4c9aa62caf061cb517b4d9ad1dd300374cd4706997aff9cd6aedd61fc64" - ], - "version": "==0.9.3" - }, - "jsonschema": { - "hashes": [ - "sha256:000e68abd33c972a5248544925a0cae7d1125f9bf6c58280d37546b946769a08", - "sha256:6ff5f3180870836cae40f06fa10419f557208175f13ad7bc26caa77beb1f6e02" - ], - "version": "==2.6.0" - }, - "markupsafe": { - "hashes": [ - "sha256:a6be69091dac236ea9c6bc7d012beab42010fa914c459791d627dad4910eb665" - ], - "version": "==1.0" - }, - "mypy": { - "hashes": [ - "sha256:0d66ca31ec6a51e465f68634e9daf363b877f0caa19a7f1d6319a743fb709d25", - "sha256:58302374890b9803b19a5547e2229f8bab46900624a2f31f398b231e5f461929" - ], - "version": "==0.550" - }, - "nose": { - "hashes": [ - "sha256:dadcddc0aefbf99eea214e0f1232b94f2fa9bd98fa8353711dacb112bfcbbb2a", - "sha256:9ff7c6cc443f8c51994b34a667bbcf45afd6d945be7477b52e97516fd17c53ac", - "sha256:f1bffef9cbc82628f6e7d7b40d7e255aefaa1adb6a1b1d26c69a8b79e6208a98" - ], - "version": "==1.3.7" - }, - "nose2": { - "hashes": [ - "sha256:b7d09629dfd616ffc0ca85f82ee53318db28affe2dbc39d2f1fb1c981ed3ec42" - ], - "version": "==0.7.2" - }, - "openapi-spec-validator": { - "hashes": [ - "sha256:c0efe630544e3a2d9ba2109e301d176f92255d7d3864a47fc56305ef058cfc6b", - "sha256:6f54ef5e1d24416b18aa858fa9c18fd50ff3a40f48cf5ceb18c2a0928c3d9e5f" - ], - "version": "==0.1.2" - }, - "psutil": { - "hashes": [ - "sha256:7ef26ebe728ac821de17df23820e6ffcfd37c409fc865380e4d5ae1388f274a1", - "sha256:692dc72817d157aae522231dd334ea2524c6b07d844db0e7a2d6897820083427", - "sha256:92342777d46e4630cf17d437412dc7fce0a8561217e074d36a35eb911ffd570e", - "sha256:f8f2f47a987c32ed3ca2068f3dfa9060dc9ff6cbed023d627d3f27060f4e59c4", - "sha256:1fce45549618d1930afefe322834ba91758331725bfdaec73ba6abcc83f6dc11", - "sha256:f8a88553b2b5916f3bd814a91942215822a1dabae6db033cbb019095d6a24bc2", - "sha256:4139f76baa59142b907dd581d7ff3506a5163cb8ef69e8e92060df330bbf5788", - "sha256:d61bc04401ce938576e4c6ec201e812ed4114bfb9712202b87003619116c90c6", - "sha256:42e2de159e3c987435cb3b47d6f37035db190a1499f3af714ba7af5c379b6ba2" - ], - "version": "==5.4.1" - }, - "psycopg2": { - "hashes": [ - "sha256:594aa9a095de16614f703d759e10c018bdffeafce2921b8e80a0e8a0ebbc12e5", - "sha256:1cf5d84290c771eeecb734abe2c6c3120e9837eb12f99474141a862b9061ac51", - "sha256:0344b181e1aea37a58c218ccb0f0f771295de9aa25a625ed076e6996c6530f9e", - "sha256:25250867a4cd1510fb755ef9cb38da3065def999d8e92c44e49a39b9b76bc893", - "sha256:317612d5d0ca4a9f7e42afb2add69b10be360784d21ce4ecfbca19f1f5eadf43", - "sha256:9d6266348b15b4a48623bf4d3e50445d8e581da413644f365805b321703d0fac", - "sha256:ddca39cc55877653b5fcf59976d073e3d58c7c406ef54ae8e61ddf8782867182", - "sha256:988d2ec7560d42ef0ac34b3b97aad14c4f068792f00e1524fa1d3749fe4e4b64", - "sha256:7a9c6c62e6e05df5406e9b5235c31c376a22620ef26715a663cee57083b3c2ea", - "sha256:7a75565181e75ba0b9fb174b58172bf6ea9b4331631cfe7bafff03f3641f5d73", - "sha256:94e4128ba1ea56f02522fffac65520091a9de3f5c00da31539e085e13db4771b", - "sha256:92179bd68c2efe72924a99b6745a9172471931fc296f9bfdf9645b75eebd6344", - "sha256:b9358e203168fef7bfe9f430afaed3a2a624717a1d19c7afa7dfcbd76e3cd95c", - "sha256:009e0bc09a57dbef4b601cb8b46a2abad51f5274c8be4bba276ff2884cd4cc53", - "sha256:d3ac07240e2304181ffdb13c099840b5eb555efc7be9344503c0c03aa681de79", - "sha256:40fa5630cd7d237cd93c4d4b64b9e5ed9273d1cfce55241c7f9066f5db70629d", - "sha256:6c2f1a76a9ebd9ecf7825b9e20860139ca502c2bf1beabf6accf6c9e66a7e0c3", - "sha256:37f54452c7787dbdc0a634ca9773362b91709917f0b365ed14b831f03cbd34ba", - "sha256:8f5942a4daf1ffac42109dc4a72f786af4baa4fa702ede1d7c57b4b696c2e7d6", - "sha256:bf708455cd1e9fa96c05126e89a0c59b200d086c7df7bbafc7d9be769e4149a3", - "sha256:82c40ea3ac1555e0462803380609fbe8b26f52620f3d4f8eb480cfd8ceed8a14", - "sha256:207ba4f9125a0a4200691e82d5eee7ea1485708eabe99a07fc7f08696fae62f4", - "sha256:0cd4c848f0e9d805d531e44973c8f48962e20eb7fc0edac3db4f9dbf9ed5ab82", - "sha256:57baf63aeb2965ca4b52613ce78e968b6d2bde700c97f6a7e8c6c236b51ab83e", - "sha256:2954557393cfc9a5c11a5199c7a78cd9c0c793a047552d27b1636da50d013916", - "sha256:7c31dade89634807196a6b20ced831fbd5bec8a21c4e458ea950c9102c3aa96f", - "sha256:1286dd16d0e46d59fa54582725986704a7a3f3d9aca6c5902a7eceb10c60cb7e", - "sha256:697ff63bc5451e0b0db48ad205151123d25683b3754198be7ab5fcb44334e519", - "sha256:fc993c9331d91766d54757bbc70231e29d5ceb2d1ac08b1570feaa0c38ab9582", - "sha256:9d64fed2681552ed642e9c0cc831a9e95ab91de72b47d0cb68b5bf506ba88647", - "sha256:5c3213be557d0468f9df8fe2487eaf2990d9799202c5ff5cb8d394d09fad9b2a" - ], - "version": "==2.7.3.2" - }, - "pyaml": { - "hashes": [ - "sha256:f83fc302c52c6b83a15345792693ae0b5bc07ad19f59e318b7617d7123d62990", - "sha256:66623c52f34d83a2c0fc963e08e8b9d0c13d88404e3b43b1852ef71eda19afa3" - ], - "version": "==17.12.1" - }, - "pyjwt": { - "hashes": [ - "sha256:a4e5f1441e3ca7b382fd0c0b416777ced1f97c64ef0c33bfa39daf38505cfd2f", - "sha256:500be75b17a63f70072416843dc80c8821109030be824f4d14758f114978bae7" - ], - "version": "==1.5.3" - }, - "pyld": { - "hashes": [ - "sha256:f11b8586d2d2bc310739a9c49018574d01b8adc4533e950f64c85a13909d7630" - ], - "version": "==0.8.2" - }, - "python-dateutil": { - "hashes": [ - "sha256:95511bae634d69bc7329ba55e646499a842bc4ec342ad54a8cdb65645a0aad3c", - "sha256:891c38b2a02f5bb1be3e4793866c8df49c7d19baabf9c1bad62547e0b4866aca" - ], - "version": "==2.6.1" - }, - "pytz": { - "hashes": [ - "sha256:ed6509d9af298b7995d69a440e2822288f2eca1681b8cce37673dbb10091e5fe", - "sha256:f93ddcdd6342f94cea379c73cddb5724e0d6d0a1c91c9bdef364dc0368ba4fda", - "sha256:61242a9abc626379574a166dc0e96a66cd7c3b27fc10868003fa210be4bff1c9", - "sha256:ba18e6a243b3625513d85239b3e49055a2f0318466e0b8a92b8fb8ca7ccdf55f", - "sha256:07edfc3d4d2705a20a6e99d97f0c4b61c800b8232dc1c04d87e8554f130148dd", - "sha256:3a47ff71597f821cd84a162e71593004286e5be07a340fd462f0d33a760782b5", - "sha256:5bd55c744e6feaa4d599a6cbd8228b4f8f9ba96de2c38d56f08e534b3c9edf0d", - "sha256:887ab5e5b32e4d0c86efddd3d055c1f363cbaa583beb8da5e22d2fa2f64d51ef", - "sha256:410bcd1d6409026fbaa65d9ed33bf6dd8b1e94a499e32168acfc7b332e4095c0" - ], - "version": "==2018.3" - }, - "pyyaml": { - "hashes": [ - "sha256:3262c96a1ca437e7e4763e2843746588a965426550f3797a79fca9c6199c431f", - "sha256:16b20e970597e051997d90dc2cddc713a2876c47e3d92d59ee198700c5427736", - "sha256:e863072cdf4c72eebf179342c94e6989c67185842d9997960b3e69290b2fa269", - "sha256:bc6bced57f826ca7cb5125a10b23fd0f2fff3b7c4701d64c439a300ce665fff8", - "sha256:c01b880ec30b5a6e6aa67b09a2fe3fb30473008c85cd6a67359a1b15ed6d83a4", - "sha256:827dc04b8fa7d07c44de11fabbc888e627fa8293b695e0f99cb544fdfa1bf0d1", - "sha256:592766c6303207a20efc445587778322d7f73b161bd994f227adaa341ba212ab", - "sha256:5f84523c076ad14ff5e6c037fe1c89a7f73a3e04cf0377cb4d017014976433f3", - "sha256:0c507b7f74b3d2dd4d1322ec8a94794927305ab4cebbe89cc47fe5e81541e6e8", - "sha256:b4c423ab23291d3945ac61346feeb9a0dc4184999ede5e7c43e1ffb975130ae6", - "sha256:ca233c64c6e40eaa6c66ef97058cdc80e8d0157a443655baa1b2966e812807ca", - "sha256:4474f8ea030b5127225b8894d626bb66c01cda098d47a2b0d3429b6700af9fd8", - "sha256:326420cbb492172dec84b0f65c80942de6cedb5233c413dd824483989c000608", - "sha256:5ac82e411044fb129bae5cfbeb3ba626acb2af31a8d17d175004b70862a741a7" - ], - "version": "==3.12" - }, - "requests": { - "hashes": [ - "sha256:6a1b267aa90cac58ac3a765d067950e7dbbf75b1da07e895d1f594193a40a38b", - "sha256:9c443e7324ba5b85070c4a818ade28bfabedf16ea10206da1132edaa6dda237e" - ], - "version": "==2.18.4" - }, - "s3transfer": { - "hashes": [ - "sha256:23c156ca4d64b022476c92c44bf938bef71af9ce0dcd8fd6585e7bce52f66e47", - "sha256:10891b246296e0049071d56c32953af05cea614dca425a601e4c0be35990121e" - ], - "version": "==0.1.12" - }, - "six": { - "hashes": [ - "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", - "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" - ], - "version": "==1.11.0" - }, - "sqlalchemy": { - "hashes": [ - "sha256:8b79a5ed91cdcb5abe97b0045664c55c140aec09e5dd5c01303e23de5fe7a95a" - ], - "version": "==1.1.15" - }, - "typed-ast": { - "hashes": [ - "sha256:0948004fa228ae071054f5208840a1e88747a357ec1101c17217bfe99b299d58", - "sha256:25d8feefe27eb0303b73545416b13d108c6067b846b543738a25ff304824ed9a", - "sha256:c05b41bc1deade9f90ddc5d988fe506208019ebba9f2578c622516fd201f5863", - "sha256:519425deca5c2b2bdac49f77b2c5625781abbaf9a809d727d3a5596b30bb4ded", - "sha256:6de012d2b166fe7a4cdf505eee3aaa12192f7ba365beeefaca4ec10e31241a85", - "sha256:79b91ebe5a28d349b6d0d323023350133e927b4de5b651a8aa2db69c761420c6", - "sha256:a8034021801bc0440f2e027c354b4eafd95891b573e12ff0418dec385c76785c", - "sha256:f19f2a4f547505fe9072e15f6f4ae714af51b5a681a97f187971f50c283193b6", - "sha256:c9b060bd1e5a26ab6e8267fd46fc9e02b54eb15fffb16d112d4c7b1c12987559", - "sha256:2e214b72168ea0275efd6c884b114ab42e316de3ffa125b267e732ed2abda892", - "sha256:bc978ac17468fe868ee589c795d06777f75496b1ed576d308002c8a5756fb9ea", - "sha256:edb04bdd45bfd76c8292c4d9654568efaedf76fe78eb246dde69bdb13b2dad87", - "sha256:668d0cec391d9aed1c6a388b0d5b97cd22e6073eaa5fbaa6d2946603b4871efe", - "sha256:29464a177d56e4e055b5f7b629935af7f49c196be47528cc94e0a7bf83fbc2b9", - "sha256:8550177fa5d4c1f09b5e5f524411c44633c80ec69b24e0e98906dd761941ca46", - "sha256:3e0d5e48e3a23e9a4d1a9f698e32a542a4a288c871d33ed8df1b092a40f3a0f9", - "sha256:68ba70684990f59497680ff90d18e756a47bf4863c604098f10de9716b2c0bdd", - "sha256:57fe287f0cdd9ceaf69e7b71a2e94a24b5d268b35df251a88fef5cc241bf73aa" - ], - "version": "==1.1.0" - }, - "urllib3": { - "hashes": [ - "sha256:06330f386d6e4b195fbfc736b297f58c5a892e4440e54d294d7004e3a9bbea1b", - "sha256:cc44da8e1145637334317feebd728bd869a35285b93cbb4cca2577da7e62db4f" - ], - "version": "==1.22" - }, - "uwsgi": { - "hashes": [ - "sha256:572ef9696b97595b4f44f6198fe8c06e6f4e6351d930d22e5330b071391272ff" - ], - "version": "==2.0.15" - }, - "werkzeug": { - "hashes": [ - "sha256:e8549c143af3ce6559699a01e26fa4174f4c591dbee0a499f3cd4c3781cdec3d", - "sha256:903a7b87b74635244548b30d30db4c8947fe64c5198f58899ddcd3a13c23bb26" - ], - "version": "==0.12.2" - } - }, - "develop": { - "coverage": { - "hashes": [ - "sha256:7608a3dd5d73cb06c531b8925e0ef8d3de31fed2544a7de6c63960a1e73ea4bc", - "sha256:3a2184c6d797a125dca8367878d3b9a178b6fdd05fdc2d35d758c3006a1cd694", - "sha256:f3f501f345f24383c0000395b26b726e46758b71393267aeae0bd36f8b3ade80", - "sha256:0b136648de27201056c1869a6c0d4e23f464750fd9a9ba9750b8336a244429ed", - "sha256:337ded681dd2ef9ca04ef5d93cfc87e52e09db2594c296b4a0a3662cb1b41249", - "sha256:3eb42bf89a6be7deb64116dd1cc4b08171734d721e7a7e57ad64cc4ef29ed2f1", - "sha256:be6cfcd8053d13f5f5eeb284aa8a814220c3da1b0078fa859011c7fffd86dab9", - "sha256:69bf008a06b76619d3c3f3b1983f5145c75a305a0fea513aca094cae5c40a8f5", - "sha256:2eb564bbf7816a9d68dd3369a510be3327f1c618d2357fa6b1216994c2e3d508", - "sha256:9d6dd10d49e01571bf6e147d3b505141ffc093a06756c60b053a859cb2128b1f", - "sha256:701cd6093d63e6b8ad7009d8a92425428bc4d6e7ab8d75efbb665c806c1d79ba", - "sha256:5a13ea7911ff5e1796b6d5e4fbbf6952381a611209b736d48e675c2756f3f74e", - "sha256:c1bb572fab8208c400adaf06a8133ac0712179a334c09224fb11393e920abcdd", - "sha256:03481e81d558d30d230bc12999e3edffe392d244349a90f4ef9b88425fac74ba", - "sha256:28b2191e7283f4f3568962e373b47ef7f0392993bb6660d079c62bd50fe9d162", - "sha256:de4418dadaa1c01d497e539210cb6baa015965526ff5afc078c57ca69160108d", - "sha256:8c3cb8c35ec4d9506979b4cf90ee9918bc2e49f84189d9bf5c36c0c1119c6558", - "sha256:7e1fe19bd6dce69d9fd159d8e4a80a8f52101380d5d3a4d374b6d3eae0e5de9c", - "sha256:6bc583dc18d5979dc0f6cec26a8603129de0304d5ae1f17e57a12834e7235062", - "sha256:198626739a79b09fa0a2f06e083ffd12eb55449b5f8bfdbeed1df4910b2ca640", - "sha256:7aa36d2b844a3e4a4b356708d79fd2c260281a7390d678a10b91ca595ddc9e99", - "sha256:3d72c20bd105022d29b14a7d628462ebdc61de2f303322c0212a054352f3b287", - "sha256:4635a184d0bbe537aa185a34193898eee409332a8ccb27eea36f262566585000", - "sha256:e05cb4d9aad6233d67e0541caa7e511fa4047ed7750ec2510d466e806e0255d6", - "sha256:76ecd006d1d8f739430ec50cc872889af1f9c1b6b8f48e29941814b09b0fd3cc", - "sha256:7d3f553904b0c5c016d1dad058a7554c7ac4c91a789fca496e7d8347ad040653", - "sha256:3c79a6f7b95751cdebcd9037e4d06f8d5a9b60e4ed0cd231342aa8ad7124882a", - "sha256:56e448f051a201c5ebbaa86a5efd0ca90d327204d8b059ab25ad0f35fbfd79f1", - "sha256:ac4fef68da01116a5c117eba4dd46f2e06847a497de5ed1d64bb99a5fda1ef91", - "sha256:1c383d2ef13ade2acc636556fd544dba6e14fa30755f26812f54300e401f98f2", - "sha256:b8815995e050764c8610dbc82641807d196927c3dbed207f0a079833ffcf588d", - "sha256:104ab3934abaf5be871a583541e8829d6c19ce7bde2923b2751e0d3ca44db60a", - "sha256:9e112fcbe0148a6fa4f0a02e8d58e94470fc6cb82a5481618fea901699bf34c4", - "sha256:15b111b6a0f46ee1a485414a52a7ad1d703bdf984e9ed3c288a4414d3871dcbd", - "sha256:e4d96c07229f58cb686120f168276e434660e4358cc9cf3b0464210b04913e77", - "sha256:f8a923a85cb099422ad5a2e345fe877bbc89a8a8b23235824a93488150e45f6e" - ], - "version": "==4.5.1" - }, - "nose2": { - "hashes": [ - "sha256:954a62cfb2d2ac06dad32995cbc822bf00cc11e20d543963515932fd4eff33fa" - ], - "version": "==0.7.4" - }, - "six": { - "hashes": [ - "sha256:832dc0e10feb1aa2c68dcc57dbb658f1c7e65b9b61af69048abc87a2db00a0eb", - "sha256:70e8a77beed4562e7f14fe23a786b54f6296e34344c23bc42f07b15018ff98e9" - ], - "version": "==1.11.0" - } - } -} diff --git a/core/arxiv/submission/__init__.py b/core/arxiv/submission/__init__.py new file mode 100644 index 0000000..8113c7d --- /dev/null +++ b/core/arxiv/submission/__init__.py @@ -0,0 +1,250 @@ +""" +Core event-centric data abstraction for the submission & moderation subsystem. + +This package provides an event-based API for mutating submissions. Instead of +representing submissions as objects and mutating them directly in web +controllers and other places, we represent a submission as a stream of commands +or events. This ensures that we have a precise and complete record of +activities concerning submissions, and provides an explicit and consistent +definition of operations that can be performed within the arXiv submission +system. + +Overview +======== + +Event types are defined in :mod:`.domain.event`. The base class for all events +is :class:`.domain.event.base.Event`. Each event type defines additional +required data, and have ``validate`` and ``project`` methods that implement its +logic. Events operate on :class:`.domain.submission.Submission` instances. + +.. code-block:: python + + from arxiv.submission import CreateSubmission, User, Submission + user = User(1345, 'foo@user.com') + creation = CreateSubmission(creator=user) + + +:mod:`.core` defines the persistence API for submission data. +:func:`.core.save` is used to commit new events. :func:`.core.load` retrieves +events for a submission and plays them forward to get the current state, +whereas :func:`.core.load_fast` retrieves the latest projected state of the +submission (faster, theoretically less reliable). + +.. code-block:: python + + from arxiv.submission import save, SetTitle + submission, events = save(creation, SetTitle(creator=user, title='Title!')) + + +Watch out for :class:`.exceptions.InvalidEvent` to catch validation-related +problems (e.g. bad data, submission in wrong state). Watch for +:class:`.SaveError` to catch problems with persisting events. + +Callbacks can be attached to event types in order to execute routines +automatically when specific events are committed, using +:func:`.domain.Event.bind`. + +.. code-block:: python + + from typing import Iterable + + @SetTitle.bind() + def flip_title(event: SetTitle, before: Submissionm, after: Submission, + creator: Agent) -> Iterable[SetTitle]: + yield SetTitle(creator=creator, title=f"(╯°□°)╯︵ ┻━┻ {event.title}") + + +.. note: + Callbacks should **only** be used for actions that are specific to the + domain/concerns of the service in which they are implemented. For processes + that apply to all submissions, including asynchronous processes, + see :mod:`agent`. + + +Finally, :mod:`.services.classic` provides integration with the classic +submission database. We use the classic database to store events (new table), +and also keep its legacy tables up to date so that other legacy components +continue to work as expected. + + +Using commands/events +===================== + +Command/event classes are defined in :mod:`arxiv.submission.domain.event`, and +are accessible from the root namespace of this package. Each event type defines +a transformation/operation on a single submission, and defines the data +required to perform that operation. Events are played forward, in order, to +derive the state of a submission. For more information about how event types +are defined, see :class:`arxiv.submission.domain.event.Event`. + +.. note:: + + One major difference between the event stream and the classic submission + database table is that in the former model, there is only one submission id + for all versions/mutations. In the legacy system, new rows are created in + the submission table for things like creating a replacement, adding a DOI, + or requesting a withdrawal. The :ref:`legacy-integration` handles the + interchange between these two models. + +Commands/events types are `PEP 557 data classes +`_. Each command/event inherits from +:class:`.Event`, and may add additional fields. See :class:`.Event` for more +information about common fields. + +To create a new command/event, initialize the class with the relevant +data, and commit it using :func:`.save`. For example: + +.. code-block:: python + + >>> from arxiv.submission import User, SetTitle, save + >>> user = User(123, "joe@bloggs.com") + >>> update = SetTitle(creator=user, title='A new theory of foo') + >>> submission = save(creation, submission_id=12345) + + +If the commands/events are for a submission that already exists, the latest +state of that submission will be obtained by playing forward past events. New +events will be validated and applied to the submission in the order that they +were passed to :func:`.save`. + +- If an event is invalid (e.g. the submission is not in an appropriate state + for the operation), an :class:`.InvalidEvent` exception will be raised. + Note that at this point nothing has been changed in the database; the + attempt is simply abandoned. +- The command/event is stored, as is the latest state of the + submission. Events and the resulting state of the submission are stored + atomically. +- If the notification service is configured, a message about the event is + propagated as a Kinesis event on the configured stream. See + :mod:`arxiv.submission.services.notification` for details. + +Special case: creation +---------------------- +Note that if the first event is a :class:`.CreateSubmission` the +submission ID need not be provided, as we won't know what it is yet. For +example: + +.. code-block:: python + + from arxiv.submission import User, CreateSubmission, SetTitle, save + + >>> user = User(123, "joe@bloggs.com") + >>> creation = CreateSubmission(creator=user) + >>> update = SetTitle(creator=user, title='A new theory of foo') + >>> submission, events = save(creation, update) + >>> submission.submission_id + 40032 + + +.. _versioning-overview: + +Versioning events +================= +Handling changes to this software in a way that does not break past data is a +non-trivial problem. In a traditional relational database arrangement we would +leverage a database migration tool to do things like apply ``ALTER`` statements +to tables when upgrading software versions. The premise of the event data +model, however, is that events are immutable -- we won't be going back to +modify past events whenever we make a change to the software. + +The strategy for version management around event data is implemented in +:mod:`arxiv.submission.domain.events.versioning`. When event data is stored, +it is tagged with the current version of this software. When +event data are loaded from the store in this software, prior to instantiating +the appropriate :class:`.Event` subclass, the data are mapped to the current +software version using any defined version mappings for that event type. +This happens on the fly, in :func:`.domain.event.event_factory`. + + +.. _legacy-integration: + +Integration with the legacy system +================================== +The :mod:`.classic` service module provides integration with the classic +database. See the documentation for that module for details. As we migrate +off of the classic database, we will swap in a new service module with the +same API. + +Until all legacy components that read from or write to the classic database are +replaced, we will not be able to move entirely away from the legacy submission +database. Particularly in the submission and moderation UIs, design has assumed +immediate consistency, which means a conventional read/write interaction with +the database. Hence the classic integration module assumes that we are reading +and writing events and submission state from/to the same database. + +As development proceeds, we will look for opportunities to decouple from the +classic database, and focus on more localized projections of submission events +that are specific to a service/application. For example, the moderation UI/API +need not maintain or have access to the complete representation of the +submission; instead, it may track the subset of events relevant to its +operation (e.g. pertaining to metadata, classification, proposals, holds, etc). + +""" +import os +import time +from typing import Any +from typing_extensions import Protocol + +from flask import Flask, Blueprint + +from arxiv.base import logging + +from .domain.event import * +from .core import * +from .domain.submission import Submission, SubmissionMetadata, Author +from .domain.agent import Agent, User, System, Client +from .services import classic, StreamPublisher, Compiler, PlainTextService,\ + Classifier + +logger = logging.getLogger(__name__) + + +def init_app(app: Flask) -> None: + """ + Configure a Flask app to use this package. + + Initializes and waits for :class:`.StreamPublisher` and :mod:`.classic` + to be available. + """ + # Initialize services. + StreamPublisher.init_app(app) + classic.init_app(app) + + template_folder = os.path.join(os.path.dirname(os.path.realpath(__file__)), + 'templates') + app.register_blueprint( + Blueprint('submission-core', __name__, template_folder=template_folder) + ) + + if app.config['WAIT_FOR_SERVICES']: + time.sleep(app.config['WAIT_ON_STARTUP']) + with app.app_context(): + stream_publisher = StreamPublisher.current_session() + stream_publisher.initialize() + wait_for(stream_publisher) + wait_for(classic) + logger.info('All upstream services are available; ready to start') + + +class IAwaitable(Protocol): + """An object that provides an ``is_available`` predicate.""" + + def is_available(self, **kwargs: Any) -> bool: + """Check whether an object (e.g. a service) is available.""" + ... + + +def wait_for(service: IAwaitable, delay: int = 2, **extra: Any) -> None: + """Wait for a service to become available.""" + if hasattr(service, '__name__'): + service_name = service.__name__ # type: ignore + elif hasattr(service, '__class__'): + service_name = service.__class__.__name__ + else: + service_name = str(service) + + logger.info('await %s', service_name) + while not service.is_available(**extra): + logger.info('service %s is not available; try again', service_name) + time.sleep(delay) + logger.info('service %s is available!', service_name) diff --git a/core/arxiv/submission/auth.py b/core/arxiv/submission/auth.py new file mode 100644 index 0000000..491fc0f --- /dev/null +++ b/core/arxiv/submission/auth.py @@ -0,0 +1,43 @@ + +from typing import List +import uuid +from datetime import datetime, timedelta +from pytz import UTC + +from arxiv.users import auth, domain +from arxiv.base.globals import get_application_config + +from .domain.agent import User, Agent, Client + + +def get_system_token(name: str, agent: Agent, scopes: List[str]) -> str: + start = datetime.now(tz=UTC) + end = start + timedelta(seconds=36000) + if isinstance(agent, User): + user = domain.User( + username=agent.username, + email=agent.email, + user_id=agent.identifier, + name=agent.name, + verified=True + ) + else: + user = None + session = domain.Session( + session_id=str(uuid.uuid4()), + start_time=datetime.now(), end_time=end, + user=user, + client=domain.Client( + owner_id='system', + client_id=name, + name=name + ), + authorizations=domain.Authorizations(scopes=scopes) + ) + return auth.tokens.encode(session, get_application_config()['JWT_SECRET']) + + +def get_compiler_scopes(resource: str) -> List[str]: + """Get minimal auth scopes necessary for compilation integration.""" + return [auth.scopes.READ_COMPILE.for_resource(resource), + auth.scopes.CREATE_COMPILE.for_resource(resource)] diff --git a/core/arxiv/submission/config.py b/core/arxiv/submission/config.py new file mode 100644 index 0000000..4683d53 --- /dev/null +++ b/core/arxiv/submission/config.py @@ -0,0 +1,299 @@ +"""Submission core configuration parameters.""" + +from os import environ +import warnings +from kombu.serialization import register +from .serializer import dumps, loads + +NAMESPACE = environ.get('NAMESPACE') +"""Namespace in which this service is deployed; to qualify keys for secrets.""" + +LOGLEVEL = int(environ.get('LOGLEVEL', '20')) +""" +Logging verbosity. + +See `https://docs.python.org/3/library/logging.html#levels`_. +""" + +JWT_SECRET = environ.get('JWT_SECRET') +"""Secret key for signing + verifying authentication JWTs.""" + +if not JWT_SECRET: + warnings.warn('JWT_SECRET is not set; authn/z may not work correctly!') + +CORE_VERSION = "0.0.0" + +MAX_SAVE_RETRIES = 25 +"""Number of times to retry storing/emiting a submission event.""" + +DEFAULT_SAVE_RETRY_DELAY = 30 +"""Delay between retry attempts when storing/emiting a submission event.""" + +WAIT_FOR_SERVICES = bool(int(environ.get('WAIT_FOR_SERVICES', '0'))) +"""Disable/enable waiting for upstream services to be available on startup.""" +if not WAIT_FOR_SERVICES: + warnings.warn('Awaiting upstream services is disabled; this should' + ' probably be enabled in production.') + +WAIT_ON_STARTUP = int(environ.get('WAIT_ON_STARTUP', '0')) +"""Number of seconds to wait before checking upstream services on startup.""" + +ENABLE_CALLBACKS = bool(int(environ.get('ENABLE_CALLBACKS', '1'))) +"""Enable/disable the :func:`Event.bind` feature.""" + + +# --- DATABASE CONFIGURATION --- + +CLASSIC_DATABASE_URI = environ.get('CLASSIC_DATABASE_URI', 'sqlite:///') +"""Full database URI for the classic system.""" + +SQLALCHEMY_DATABASE_URI = CLASSIC_DATABASE_URI +"""Full database URI for the classic system.""" + +SQLALCHEMY_TRACK_MODIFICATIONS = False +"""Track modifications feature should always be disabled.""" + +# --- AWS CONFIGURATION --- + +AWS_ACCESS_KEY_ID = environ.get('AWS_ACCESS_KEY_ID', 'nope') +""" +Access key for requests to AWS services. + +If :const:`VAULT_ENABLED` is ``True``, this will be overwritten. +""" + +AWS_SECRET_ACCESS_KEY = environ.get('AWS_SECRET_ACCESS_KEY', 'nope') +""" +Secret auth key for requests to AWS services. + +If :const:`VAULT_ENABLED` is ``True``, this will be overwritten. +""" + +AWS_REGION = environ.get('AWS_REGION', 'us-east-1') +"""Default region for calling AWS services.""" + + +# --- KINESIS CONFIGURATION --- + +KINESIS_STREAM = environ.get("KINESIS_STREAM", "SubmissionEvents") +"""Name of the stream on which to produce and consume events.""" + +KINESIS_SHARD_ID = environ.get("KINESIS_SHARD_ID", "0") +"""Shard ID for stream producer.""" + +KINESIS_ENDPOINT = environ.get("KINESIS_ENDPOINT", None) +""" +Alternate endpoint for connecting to Kinesis. + +If ``None``, uses the boto3 defaults for the :const:`AWS_REGION`. This is here +mainly to support development with localstack or other mocking frameworks. +""" + +KINESIS_VERIFY = bool(int(environ.get("KINESIS_VERIFY", "1"))) +""" +Enable/disable TLS certificate verification when connecting to Kinesis. + +This is here support development with localstack or other mocking frameworks. +""" + +if not KINESIS_VERIFY: + warnings.warn('Certificate verification for Kinesis is disabled; this' + ' should not be disabled in production.') + +# --- UPSTREAM SERVICE INTEGRATIONS --- +# +# See https://kubernetes.io/docs/concepts/services-networking/service/#environment-variables +# for details on service DNS and environment variables in k8s. + +# Integration with the file manager service. +FILEMANAGER_HOST = environ.get('FILEMANAGER_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the filemanager service.""" + +FILEMANAGER_PORT = environ.get('FILEMANAGER_SERVICE_PORT', '443') +"""Port for the filemanager service.""" + +FILEMANAGER_PROTO = environ.get(f'FILEMANAGER_PORT_{FILEMANAGER_PORT}_PROTO', + 'https') +"""Protocol for the filemanager service.""" + +FILEMANAGER_PATH = environ.get('FILEMANAGER_PATH', '').lstrip('/') +"""Path at which the filemanager service is deployed.""" + +FILEMANAGER_ENDPOINT = environ.get( + 'FILEMANAGER_ENDPOINT', + '%s://%s:%s/%s' % (FILEMANAGER_PROTO, FILEMANAGER_HOST, + FILEMANAGER_PORT, FILEMANAGER_PATH) +) +""" +Full URL to the root filemanager service API endpoint. + +If not explicitly provided, this is composed from :const:`FILEMANAGER_HOST`, +:const:`FILEMANAGER_PORT`, :const:`FILEMANAGER_PROTO`, and +:const:`FILEMANAGER_PATH`. +""" + +FILEMANAGER_VERIFY = bool(int(environ.get('FILEMANAGER_VERIFY', '1'))) +"""Enable/disable SSL certificate verification for filemanager service.""" + +if FILEMANAGER_PROTO == 'https' and not FILEMANAGER_VERIFY: + warnings.warn('Certificate verification for filemanager is disabled; this' + ' should not be disabled in production.') + +# Integration with the compiler service. +COMPILER_HOST = environ.get('COMPILER_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the compiler service.""" + +COMPILER_PORT = environ.get('COMPILER_SERVICE_PORT', '443') +"""Port for the compiler service.""" + +COMPILER_PROTO = environ.get(f'COMPILER_PORT_{COMPILER_PORT}_PROTO', 'https') +"""Protocol for the compiler service.""" + +COMPILER_PATH = environ.get('COMPILER_PATH', '') +"""Path at which the compiler service is deployed.""" + +COMPILER_ENDPOINT = environ.get( + 'COMPILER_ENDPOINT', + '%s://%s:%s/%s' % (COMPILER_PROTO, COMPILER_HOST, COMPILER_PORT, + COMPILER_PATH) +) +""" +Full URL to the root compiler service API endpoint. + +If not explicitly provided, this is composed from :const:`COMPILER_HOST`, +:const:`COMPILER_PORT`, :const:`COMPILER_PROTO`, and :const:`COMPILER_PATH`. +""" + +COMPILER_VERIFY = bool(int(environ.get('COMPILER_VERIFY', '1'))) +"""Enable/disable SSL certificate verification for compiler service.""" + +if COMPILER_PROTO == 'https' and not COMPILER_VERIFY: + warnings.warn('Certificate verification for compiler is disabled; this' + ' should not be disabled in production.') + +# Integration with the classifier service. +CLASSIFIER_HOST = environ.get('CLASSIFIER_SERVICE_HOST', 'localhost') +"""Hostname or addreess of the classifier service.""" + +CLASSIFIER_PORT = environ.get('CLASSIFIER_SERVICE_PORT', '8000') +"""Port for the classifier service.""" + +CLASSIFIER_PROTO = environ.get(f'CLASSIFIER_PORT_{CLASSIFIER_PORT}_PROTO', + 'http') +"""Protocol for the classifier service.""" + +CLASSIFIER_PATH = environ.get('CLASSIFIER_PATH', '/classifier/') +"""Path at which the classifier service is deployed.""" + +CLASSIFIER_ENDPOINT = environ.get( + 'CLASSIFIER_ENDPOINT', + '%s://%s:%s/%s' % (CLASSIFIER_PROTO, CLASSIFIER_HOST, CLASSIFIER_PORT, + CLASSIFIER_PATH) +) +""" +Full URL to the root classifier service API endpoint. + +If not explicitly provided, this is composed from :const:`CLASSIFIER_HOST`, +:const:`CLASSIFIER_PORT`, :const:`CLASSIFIER_PROTO`, and +:const:`CLASSIFIER_PATH`. +""" + +CLASSIFIER_VERIFY = bool(int(environ.get('CLASSIFIER_VERIFY', '0'))) +"""Enable/disable SSL certificate verification for classifier service.""" + +if CLASSIFIER_PROTO == 'https' and not CLASSIFIER_VERIFY: + warnings.warn('Certificate verification for classifier is disabled; this' + ' should not be disabled in production.') + +# Integration with plaintext extraction service. +PLAINTEXT_HOST = environ.get('PLAINTEXT_SERVICE_HOST', 'arxiv.org') +"""Hostname or addreess of the plaintext extraction service.""" + +PLAINTEXT_PORT = environ.get('PLAINTEXT_SERVICE_PORT', '443') +"""Port for the plaintext extraction service.""" + +PLAINTEXT_PROTO = environ.get(f'PLAINTEXT_PORT_{PLAINTEXT_PORT}_PROTO', + 'https') +"""Protocol for the plaintext extraction service.""" + +PLAINTEXT_PATH = environ.get('PLAINTEXT_PATH', '') +"""Path at which the plaintext extraction service is deployed.""" + +PLAINTEXT_ENDPOINT = environ.get( + 'PLAINTEXT_ENDPOINT', + '%s://%s:%s/%s' % (PLAINTEXT_PROTO, PLAINTEXT_HOST, PLAINTEXT_PORT, + PLAINTEXT_PATH) +) +""" +Full URL to the root plaintext extraction service API endpoint. + +If not explicitly provided, this is composed from :const:`PLAINTEXT_HOST`, +:const:`PLAINTEXT_PORT`, :const:`PLAINTEXT_PROTO`, and :const:`PLAINTEXT_PATH`. +""" + +PLAINTEXT_VERIFY = bool(int(environ.get('PLAINTEXT_VERIFY', '1'))) +"""Enable/disable certificate verification for plaintext extraction service.""" + +if PLAINTEXT_PROTO == 'https' and not PLAINTEXT_VERIFY: + warnings.warn('Certificate verification for plaintext extraction service' + ' is disabled; this should not be disabled in production.') + +# Email notification configuration. +EMAIL_ENABLED = bool(int(environ.get('EMAIL_ENABLED', '1'))) +"""Enable/disable sending e-mail. Default is enabled (True).""" + +DEFAULT_SENDER = environ.get('DEFAULT_SENDER', 'noreply@arxiv.org') +"""Default sender address for e-mail.""" + +SUPPORT_EMAIL = environ.get('SUPPORT_EMAIL', "help@arxiv.org") +"""E-mail address for user support.""" + +SMTP_HOSTNAME = environ.get('SMTP_HOSTNAME', 'localhost') +"""Hostname for the SMTP server.""" + +SMTP_USERNAME = environ.get('SMTP_USERNAME', 'foouser') +"""Username for the SMTP server.""" + +SMTP_PASSWORD = environ.get('SMTP_PASSWORD', 'foopass') +"""Password for the SMTP server.""" + +SMTP_PORT = int(environ.get('SMTP_PORT', '0')) +"""SMTP service port.""" + +SMTP_LOCAL_HOSTNAME = environ.get('SMTP_LOCAL_HOSTNAME', None) +"""Local host name to include in SMTP request.""" + +SMTP_SSL = bool(int(environ.get('SMTP_SSL', '0'))) +"""Enable/disable SSL for SMTP. Default is disabled.""" + +if not SMTP_SSL: + warnings.warn('Certificate verification for SMTP is disabled; this' + ' should not be disabled in production.') + + +# --- URL GENERATION --- + +EXTERNAL_URL_SCHEME = environ.get('EXTERNAL_URL_SCHEME', 'https') +"""Scheme to use for external URLs.""" + +if EXTERNAL_URL_SCHEME != 'https': + warnings.warn('External URLs will not use HTTPS proto') + +BASE_SERVER = environ.get('BASE_SERVER', 'arxiv.org') +"""Base arXiv server.""" + +SERVER_NAME = environ.get('SERVER_NAME', "submit.arxiv.org") +"""The name of this server.""" + +URLS = [ + ("submission", "/", SERVER_NAME), + ("confirmation", "//confirmation", SERVER_NAME) +] +""" +URLs for external services, for use with :func:`flask.url_for`. + +This subset of URLs is common only within submit, for now - maybe move to base +if these pages seem relevant to other services. + +For details, see :mod:`arxiv.base.urls`. +""" diff --git a/core/arxiv/submission/core.py b/core/arxiv/submission/core.py new file mode 100644 index 0000000..81e3876 --- /dev/null +++ b/core/arxiv/submission/core.py @@ -0,0 +1,201 @@ +"""Core persistence methods for submissions and submission events.""" + +from typing import Callable, List, Dict, Mapping, Tuple, Iterable, Optional +from functools import wraps +from collections import defaultdict +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from arxiv.base import logging +from arxiv.base.globals import get_application_config, get_application_global + +from .domain.submission import Submission, SubmissionMetadata, Author +from .domain.agent import Agent, User, System, Client +from .domain.event import Event, CreateSubmission +from .services import classic, StreamPublisher +from .exceptions import InvalidEvent, NoSuchSubmission, SaveError, NothingToDo + + +logger = logging.getLogger(__name__) + + +def load(submission_id: int) -> Tuple[Submission, List[Event]]: + """ + Load a submission and its history. + + This loads all events for the submission, and generates the most + up-to-date representation based on those events. + + Parameters + ---------- + submission_id : str + Submission identifier. + + Returns + ------- + :class:`.domain.submission.Submission` + The current state of the submission. + list + Items are :class:`.Event` instances, in order of their occurrence. + + Raises + ------ + :class:`arxiv.submission.exceptions.NoSuchSubmission` + Raised when a submission with the passed ID cannot be found. + + """ + try: + with classic.transaction(): + return classic.get_submission(submission_id) + except classic.NoSuchSubmission as e: + raise NoSuchSubmission(f'No submission with id {submission_id}') from e + + +def load_submissions_for_user(user_id: int) -> List[Submission]: + """ + Load active :class:`.domain.submission.Submission` for a specific user. + + Parameters + ---------- + user_id : int + Unique identifier for the user. + + Returns + ------- + list + Items are :class:`.domain.submission.Submission` instances. + + """ + with classic.transaction(): + return classic.get_user_submissions_fast(user_id) + + +def load_fast(submission_id: int) -> Submission: + """ + Load a :class:`.domain.submission.Submission` from its projected state. + + This does not load and apply past events. The most recent stored submission + state is loaded directly from the database. + + Parameters + ---------- + submission_id : str + Submission identifier. + + Returns + ------- + :class:`.domain.submission.Submission` + The current state of the submission. + + """ + try: + with classic.transaction(): + return classic.get_submission_fast(submission_id) + except classic.NoSuchSubmission as e: + raise NoSuchSubmission(f'No submission with id {submission_id}') from e + + +def save(*events: Event, submission_id: Optional[str] = None) \ + -> Tuple[Submission, List[Event]]: + """ + Commit a set of new :class:`.Event` instances for a submission. + + This will persist the events to the database, along with the final + state of the submission, and generate external notification(s) on the + appropriate channels. + + Parameters + ---------- + events : :class:`.Event` + Events to apply and persist. + submission_id : int + The unique ID for the submission, if available. If not provided, it is + expected that ``events`` includes a :class:`.CreateSubmission`. + + Returns + ------- + :class:`arxiv.submission.domain.submission.Submission` + The state of the submission after all events (including rule-derived + events) have been applied. Updated with the submission ID, if a + :class:`.CreateSubmission` was included. + list + A list of :class:`.Event` instances applied to the submission. Note + that this list may contain more events than were passed, if event + rules were triggered. + + Raises + ------ + :class:`arxiv.submission.exceptions.NoSuchSubmission` + Raised if ``submission_id`` is not provided and the first event is not + a :class:`.CreateSubmission`, or ``submission_id`` is provided but + no such submission exists. + :class:`.InvalidEvent` + If an invalid event is encountered, the entire operation is aborted + and this exception is raised. + :class:`.SaveError` + There was a problem persisting the events and/or submission state + to the database. + + """ + if len(events) == 0: + raise NothingToDo('Must pass at least one event') + events = list(events) # Coerce to list so that we can index. + prior: List[Event] = [] + before: Optional[Submission] = None + + # We need ACIDity surrounding the the validation and persistence of new + # events. + with classic.transaction(): + # Get the current state of the submission from past events. Normally we + # would not want to load all past events, but legacy components may be + # active, and the legacy projected state does not capture all of the + # detail in the event model. + if submission_id is not None: + # This will create a shared lock on the submission rows while we + # are working with them. + before, prior = classic.get_submission(submission_id, + for_update=True) + + # Either we need a submission ID, or the first event must be a + # creation. + elif events[0].submission_id is None \ + and not isinstance(events[0], CreateSubmission): + raise NoSuchSubmission('Unable to determine submission') + + committed: List[Event] = [] + for event in events: + # Fill in submission IDs, if they are missing. + if event.submission_id is None and submission_id is not None: + event.submission_id = submission_id + + # The created timestamp should be roughly when the event was + # committed. Since the event projection may refer to its own ID + # (which is based) on the creation time, this must be set before + # the event is applied. + event.created = datetime.now(UTC) + # Mutation happens here; raises InvalidEvent. + logger.debug('Apply event %s: %s', event.event_id, event.NAME) + after = event.apply(before) + committed.append(event) + if not event.committed: + after, consequent_events = event.commit(_store_event) + committed += consequent_events + + before = after # Prepare for the next event. + + all_ = sorted(set(prior) | set(committed), key=lambda e: e.created) + return after, list(all_) + + +def _store_event(event, before, after) -> Tuple[Event, Submission]: + return classic.store_event(event, before, after, StreamPublisher.put) + + +def init_app(app: Flask) -> None: + """Set default configuration parameters for an application instance.""" + classic.init_app(app) + StreamPublisher.init_app(app) + app.config.setdefault('ENABLE_CALLBACKS', 0) + app.config.setdefault('ENABLE_ASYNC', 0) diff --git a/core/arxiv/submission/domain/__init__.py b/core/arxiv/submission/domain/__init__.py new file mode 100644 index 0000000..a393400 --- /dev/null +++ b/core/arxiv/submission/domain/__init__.py @@ -0,0 +1,10 @@ +"""Core data structures for the submission and moderation system.""" + +from .submission import Submission, SubmissionMetadata, Author, Hold, \ + WithdrawalRequest, UserRequest, CrossListClassificationRequest, \ + Compilation, SubmissionContent +from .agent import User, System, Client, Agent, agent_factory +from .event import event_factory, Event +from .annotation import Comment +from .proposal import Proposal +from .meta import Category, License, Classification diff --git a/core/arxiv/submission/domain/agent.py b/core/arxiv/submission/domain/agent.py new file mode 100644 index 0000000..698cdab --- /dev/null +++ b/core/arxiv/submission/domain/agent.py @@ -0,0 +1,134 @@ +"""Data structures for agents.""" + +import hashlib +from typing import Any, Optional, List, Union + +from dataclasses import dataclass, field +from dataclasses import asdict + +from .meta import Classification + +__all__ = ('Agent', 'User', 'System', 'Client', 'agent_factory') + + +@dataclass +class Agent: + """ + Base class for agents in the submission system. + + An agent is an actor/system that generates/is responsible for events. + """ + + native_id: str + """Type-specific identifier for the agent. This might be an URI.""" + + def __post_init__(self): + """Set derivative fields.""" + self.agent_type = self.__class__.get_agent_type() + self.agent_identifier = self.get_agent_identifier() + + @classmethod + def get_agent_type(cls) -> str: + """Get the name of the instance's class.""" + return cls.__name__ + + def get_agent_identifier(self) -> str: + """ + Get the unique identifier for this agent instance. + + Based on both the agent type and native ID. + """ + h = hashlib.new('sha1') + h.update(b'%s:%s' % (self.agent_type.encode('utf-8'), + str(self.native_id).encode('utf-8'))) + return h.hexdigest() + + def __eq__(self, other: Any) -> bool: + """Equality comparison for agents based on type and identifier.""" + if not isinstance(other, self.__class__): + return False + return self.agent_identifier == other.agent_identifier + + +@dataclass +class User(Agent): + """An (human) end user.""" + + email: str = field(default_factory=str) + username: str = field(default_factory=str) + forename: str = field(default_factory=str) + surname: str = field(default_factory=str) + suffix: str = field(default_factory=str) + name: str = field(default_factory=str) + identifier: Optional[str] = field(default=None) + affiliation: str = field(default_factory=str) + hostname: Optional[str] = field(default=None) + """Hostname or IP address from which user requests are originating.""" + + endorsements: List[str] = field(default_factory=list) + agent_type: str = field(default_factory=str) + agent_identifier: str = field(default_factory=str) + + def __post_init__(self): + """Set derivative fields.""" + self.name = self.get_name() + self.agent_type = self.get_agent_type() + + def get_name(self): + """Full name of the user.""" + return f"{self.forename} {self.surname} {self.suffix}" + + +# TODO: extend this to support arXiv-internal services. +@dataclass +class System(Agent): + """The submission application (this application).""" + + agent_type: str = field(default_factory=str) + agent_identifier: str = field(default_factory=str) + username: str = field(default_factory=str) + hostname: str = field(default_factory=str) + + def __post_init__(self): + """Set derivative fields.""" + super(System, self).__post_init__() + self.username = self.native_id + self.hostname = self.native_id + self.agent_type = self.get_agent_type() + + +@dataclass +class Client(Agent): + """A non-human third party, usually an API client.""" + + hostname: Optional[str] = field(default=None) + """Hostname or IP address from which client requests are originating.""" + + agent_type: str = field(default_factory=str) + agent_identifier: str = field(default_factory=str) + + def __post_init__(self): + """Set derivative fields.""" + self.agent_type = self.get_agent_type() + + +_agent_types = { + User.get_agent_type(): User, + System.get_agent_type(): System, + Client.get_agent_type(): Client, +} + + +def agent_factory(**data: Union[Agent, dict]) -> Agent: + """Instantiate a subclass of :class:`.Agent`.""" + if isinstance(data, Agent): + return data + agent_type = data.pop('agent_type') + native_id = data.pop('native_id') + if not agent_type or not native_id: + raise ValueError('No such agent: %s, %s' % (agent_type, native_id)) + if agent_type not in _agent_types: + raise ValueError(f'No such agent type: {agent_type}') + klass = _agent_types[agent_type] + data = {k: v for k, v in data.items() if k in klass.__dataclass_fields__} + return klass(native_id, **data) diff --git a/core/arxiv/submission/domain/annotation.py b/core/arxiv/submission/domain/annotation.py new file mode 100644 index 0000000..8214655 --- /dev/null +++ b/core/arxiv/submission/domain/annotation.py @@ -0,0 +1,108 @@ +""" +Provides quality-assurance annotations for the submission & moderation system. +""" + +from typing import Optional, Union, List +from datetime import datetime +import hashlib +from enum import Enum +from mypy_extensions import TypedDict + +from dataclasses import dataclass, asdict, field + +from arxiv.taxonomy import Category + +from .util import get_tzaware_utc_now +from .agent import Agent, agent_factory + + +@dataclass +class Comment: + """A freeform textual annotation.""" + + event_id: str + creator: Agent + created: datetime + proxy: Optional[Agent] = field(default=None) + body: str = field(default_factory=str) + + def __post_init__(self): + """Check our agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + + +ClassifierResult = TypedDict('ClassifierResult', + {'category': Category, 'probability': float}) + + +@dataclass +class Annotation: + def __post_init__(self): + """Check our agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + + +@dataclass +class ClassifierResults(Annotation): + """Represents suggested classifications from an auto-classifier.""" + + class Classifiers(Enum): + """Supported classifiers.""" + + CLASSIC = "classic" + + event_id: str + creator: Agent + created: datetime + proxy: Optional[Agent] = field(default=None) + classifier: Classifiers = field(default=Classifiers.CLASSIC) + results: List[ClassifierResult] = field(default_factory=list) + annotation_type: str = field(default='ClassifierResults') + + def __post_init__(self): + """Check our enums.""" + super(ClassifierResults, self).__post_init__() + self.classifier = self.Classifiers(self.classifier) + + +@dataclass +class Feature(Annotation): + """Represents features drawn from the content of the submission.""" + + class Type(Enum): + """Supported features.""" + + CHARACTER_COUNT = "chars" + PAGE_COUNT = "pages" + STOPWORD_COUNT = "stops" + STOPWORD_PERCENT = "%stop" + WORD_COUNT = "words" + + event_id: str + created: datetime + creator: Agent + feature_type: Type + proxy: Optional[Agent] = field(default=None) + feature_value: Union[int, float] = field(default=0) + annotation_type: str = field(default='Feature') + + def __post_init__(self): + """Check our enums.""" + super(Feature, self).__post_init__() + self.feature_type = self.Type(self.feature_type) + + +annotation_types = { + 'Feature': Feature, + 'ClassifierResults': ClassifierResults +} + + +def annotation_factory(**data) -> Annotation: + return annotation_types[data.pop('annotation_type')](**data) diff --git a/core/arxiv/submission/domain/compilation.py b/core/arxiv/submission/domain/compilation.py new file mode 100644 index 0000000..8285299 --- /dev/null +++ b/core/arxiv/submission/domain/compilation.py @@ -0,0 +1,138 @@ +"""Data structs related to compilation.""" + +from enum import Enum +from datetime import datetime +from typing import Optional, NamedTuple +from dataclasses import dataclass, field +import io + + +@dataclass +class Compilation: + """The state of a compilation attempt from the :mod:`.compiler` service.""" + + class Status(Enum): # type: ignore + """Acceptable compilation process statuses.""" + + SUCCEEDED = "completed" + IN_PROGRESS = "in_progress" + FAILED = "failed" + + class Format(Enum): # type: ignore + """Supported compilation output formats.""" + + PDF = "pdf" + DVI = "dvi" + PS = "ps" + + @property + def content_type(self): + """Get the MIME type for the compilation product.""" + _ctypes = { + self.PDF: 'application/pdf', + self.DVI: 'application/x-dvi', + self.PS: 'application/postscript' + } + return _ctypes[self] + + class SupportedCompiler(Enum): + """Compiler known to be supported by the compiler service.""" + + PDFLATEX = 'pdflatex' + + class Reason(Enum): + """Specific reasons for a (usually failure) outcome.""" + + AUTHORIZATION = "auth_error" + MISSING = "missing_source" + SOURCE_TYPE = "invalid_source_type" + CORRUPTED = "corrupted_source" + CANCELLED = "cancelled" + ERROR = "compilation_errors" + NETWORK = "network_error" + STORAGE = "storage" + DOCKER = 'docker' + NONE = None + + # Here are the actual slots/fields. + source_id: str + """This is the upload workspace identifier.""" + status: Status + """The status of the compilation.""" + checksum: str + """Checksum of the source package that we are compiling.""" + output_format: Format = field(default=Format.PDF) + """The requested output format.""" + reason: Reason = field(default=Reason.NONE) + """The specific reason for the :attr:`.status`.""" + description: Optional[str] = field(default=None) + """Additional detail about the :attr:`.status`.""" + size_bytes: int = field(default=0) + """The size of the compilation product in bytes.""" + start_time: Optional[datetime] = field(default=None) + end_time: Optional[datetime] = field(default=None) + + def __post_init__(self): + """Check enums.""" + self.output_format = self.Format(self.output_format) + self.reason = self.Reason(self.reason) + + @property + def identifier(self): + """Get the task identifier.""" + return self.get_identifier(self.source_id, self.checksum, + self.output_format) + + @staticmethod + def get_identifier(source_id: str, checksum: str, + output_format: Format = Format.PDF) -> str: + return f"{source_id}/{checksum}/{output_format.value}" + + @property + def content_type(self): + """Get the MIME type for the compilation product.""" + return self.output_format.content_type + + +@dataclass +class CompilationProduct: + """Content of a compilation product itself.""" + + stream: io.BytesIO + """Readable buffer with the product content.""" + + content_type: str + """MIME-type of the stream.""" + + status: Optional[Compilation] = field(default=None) + """Status information about the product.""" + + checksum: Optional[str] = field(default=None) + """The B64-encoded MD5 hash of the compilation product.""" + + def __post_init__(self): + """Check status.""" + if self.status and type(self.status) is dict: + self.status = Compilation(**self.status) + + +@dataclass +class CompilationLog: + """Content of a compilation log.""" + + stream: io.BytesIO + """Readable buffer with the product content.""" + + status: Optional[Compilation] = field(default=None) + """Status information about the log.""" + + checksum: Optional[str] = field(default=None) + """The B64-encoded MD5 hash of the log.""" + + content_type: str = field(default='text/plain') + """MIME-type of the stream.""" + + def __post_init__(self): + """Check status.""" + if self.status and type(self.status) is dict: + self.status = Compilation(**self.status) diff --git a/core/arxiv/submission/domain/event/__init__.py b/core/arxiv/submission/domain/event/__init__.py new file mode 100644 index 0000000..ed7edab --- /dev/null +++ b/core/arxiv/submission/domain/event/__init__.py @@ -0,0 +1,1262 @@ +""" +Data structures for submissions events. + +- Events have unique identifiers generated from their data (creation, agent, + submission). +- Events provide methods to update a submission based on the event data. +- Events provide validation methods for event data. + +Writing new events/commands +=========================== + +Events/commands are implemented as classes that inherit from :class:`.Event`. +It should: + +- Be a dataclass (i.e. be decorated with :func:`dataclasses.dataclass`). +- Define (using :func:`dataclasses.field`) associated data. +- Implement a validation method with the signature + ``validate(self, submission: Submission) -> None`` (see below). +- Implement a projection method with the signature + ``project(self, submission: Submission) -> Submission:`` that mutates + the passed :class:`.domain.submission.Submission` instance. + The projection *must not* generate side-effects, because it will be called + any time we are generating the state of a submission. If you need to + generate a side-effect, see :ref:`callbacks`\. +- Be fully documented. Be sure that the class docstring fully describes the + meaning of the event/command, and that both public and private methods have + at least a summary docstring. +- Have a corresponding :class:`unittest.TestCase` in + :mod:`arxiv.submission.domain.tests.test_events`. + +Adding validation to events +=========================== + +Each command/event class should implement an instance method +``validate(self, submission: Submission) -> None`` that raises +:class:`.InvalidEvent` exceptions if the data on the event instance is not +valid. + +For clarity, it's a good practice to individuate validation steps as separate +private instance methods, and call them from the public ``validate`` method. +This makes it easier to identify which validation criteria are being applied, +in what order, and what those criteria mean. + +See :class:`.SetPrimaryClassification` for an example. + +We could consider standalone validation functions for validation checks that +are performed on several event types (instead of just private instance +methods). + +.. _callbacks: + +Registering event callbacks +=========================== + +The base :class:`Event` provides support for callbacks that are executed when +an event instance is committed. To attach a callback to an event type, use the +:func:`Event.bind` decorator. For example: + +.. code-block:: python + + @SetTitle.bind() + def do_this_when_a_title_is_set(event, before, after, agent): + ... + return [] + + +Callbacks must have the signature ``(event: Event, before: Submission, +after: Submission, creator: Agent) -> Iterable[Event]``. ``event`` is the +event instance being committed that triggered the callback. ``before`` and +``after`` are the states of the submission before and after the event was +applied, respectively. ``agent`` is the agent responsible for any subsequent +events created by the callback, and should be used for that purpose. + +The callback should not concern itself with persistence; that is handled by +:func:`Event.commit`. Any mutations of submission should be made by returning +the appropriate command/event instances. + +The circumstances under which the callback is executed can be controlled by +passing a condition callable to the decorator. This should have the signature +``(event: Event, before: Submission, after: Submission, creator: Agent) -> +bool``; if it returns ``True``, the callback will be executed. For example: + +.. code-block:: python + + @SetTitle.bind(condition=lambda e, b, a, c: e.title == 'foo') + def do_this_when_a_title_is_set_to_foo(event, before, after, agent): + ... + return [] + + +When do things actually happen? +------------------------------- +Callbacks are triggered when the :func:`.commit` method is called, +usually by :func:`.core.save`. Normally, any event instances returned +by the callback are applied and committed right away, in order. + +Setting :mod:`.config.ENABLE_CALLBACKS=0` will disable callbacks +entirely. + +""" + +import hashlib +import re +import copy +from datetime import datetime +from collections import defaultdict +from functools import wraps +from pytz import UTC +from typing import Optional, TypeVar, List, Tuple, Any, Dict, Union, Iterable,\ + Callable, ClassVar, Mapping +from urllib.parse import urlparse +from dataclasses import field, asdict +from .util import dataclass +import bleach + +from arxiv.util import schema +from arxiv import taxonomy +from arxiv import identifier as arxiv_identifier +from arxiv.base import logging +from arxiv.base.globals import get_application_config + +from ..agent import Agent, System, agent_factory +from ..submission import Submission, SubmissionMetadata, Author, \ + Classification, License, Delegation, \ + SubmissionContent, WithdrawalRequest, CrossListClassificationRequest +from ..annotation import Comment, Feature, ClassifierResults, \ + ClassifierResult + +from ...exceptions import InvalidEvent +from ..util import get_tzaware_utc_now +from .base import Event, event_factory, EventType +from .request import RequestCrossList, RequestWithdrawal, ApplyRequest, \ + RejectRequest, ApproveRequest, CancelRequest +from . import validators +from .proposal import AddProposal, RejectProposal, AcceptProposal +from .flag import AddMetadataFlag, AddUserFlag, AddContentFlag, RemoveFlag, \ + AddHold, RemoveHold +from .process import AddProcessStatus + +logger = logging.getLogger(__name__) + + +# Events related to the creation of a new submission. +# +# These are largely the domain of the metadata API, and the submission UI. + + +@dataclass() +class CreateSubmission(Event): + """Creation of a new :class:`.domain.submission.Submission`.""" + + NAME = "create submission" + NAMED = "submission created" + + def validate(self, *args, **kwargs) -> None: + """Validate creation of a submission.""" + return + + def project(self, submission: None = None) -> Submission: + """Create a new :class:`.domain.submission.Submission`.""" + return Submission(creator=self.creator, created=self.created, + owner=self.creator, proxy=self.proxy, + client=self.client) + + +@dataclass(init=False) +class CreateSubmissionVersion(Event): + """ + Creates a new version of a submission. + + Takes the submission back to "working" state; the user or client may make + additional changes before finalizing the submission. + """ + + NAME = "create a new version" + NAMED = "new version created" + + def validate(self, submission: Submission) -> None: + """Only applies to announced submissions.""" + if not submission.is_announced: + raise InvalidEvent(self, "Must already be announced") + validators.no_active_requests(self, submission) + + def project(self, submission: Submission) -> Submission: + """Increment the version number, and reset several fields.""" + submission.version += 1 + submission.status = Submission.WORKING + # Return these to default. + submission.status = Submission.status + submission.source_content = Submission.source_content + submission.license = Submission.license + submission.submitter_is_author = Submission.submitter_is_author + submission.submitter_contact_verified = \ + Submission.submitter_contact_verified + submission.submitter_accepts_policy = \ + Submission.submitter_accepts_policy + submission.submitter_confirmed_preview = \ + Submission.submitter_confirmed_preview + return submission + + +@dataclass(init=False) +class Rollback(Event): + """Roll back to the most recent announced version, or delete.""" + + NAME = "roll back or delete" + NAMED = "rolled back or deleted" + + def validate(self, submission: Submission) -> None: + """Only applies to submissions in an unannounced state.""" + if submission.is_announced: + raise InvalidEvent(self, "Cannot already be announced") + elif submission.version > 1 and not submission.versions: + raise InvalidEvent(self, "No announced version to which to revert") + + def project(self, submission: Submission) -> Submission: + """Decrement the version number, and reset fields.""" + if submission.version == 1: + submission.status = Submission.DELETED + return submission + submission.version -= 1 + target = submission.versions[-1] + # Return these to last announced state. + submission.status = target.status + submission.source_content = target.source_content + submission.submitter_contact_verified = \ + target.submitter_contact_verified + submission.submitter_accepts_policy = \ + target.submitter_accepts_policy + submission.submitter_confirmed_preview = \ + target.submitter_confirmed_preview + submission.license = target.license + submission.metadata = copy.deepcopy(target.metadata) + return submission + + +@dataclass(init=False) +class ConfirmContactInformation(Event): + """Submitter has verified their contact information.""" + + NAME = "confirm contact information" + NAMED = "contact information confirmed" + + def validate(self, submission: Submission) -> None: + """Cannot apply to a finalized submission.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Update :attr:`.Submission.submitter_contact_verified`.""" + submission.submitter_contact_verified = True + return submission + + +@dataclass() +class ConfirmAuthorship(Event): + """The submitting user asserts whether they are an author of the paper.""" + + NAME = "confirm that submitter is an author" + NAMED = "submitter authorship status confirmed" + + submitter_is_author: bool = True + + def validate(self, submission: Submission) -> None: + """Cannot apply to a finalized submission.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Update the authorship flag on the submission.""" + submission.submitter_is_author = self.submitter_is_author + return submission + + +@dataclass(init=False) +class ConfirmPolicy(Event): + """The submitting user accepts the arXiv submission policy.""" + + NAME = "confirm policy acceptance" + NAMED = "policy acceptance confirmed" + + def validate(self, submission: Submission) -> None: + """Cannot apply to a finalized submission.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Set the policy flag on the submission.""" + submission.submitter_accepts_policy = True + return submission + + +@dataclass() +class SetPrimaryClassification(Event): + """Update the primary classification of a submission.""" + + NAME = "set primary classification" + NAMED = "primary classification set" + + category: Optional[taxonomy.Category] = None + + def validate(self, submission: Submission) -> None: + """Validate the primary classification category.""" + validators.must_be_a_valid_category(self, self.category, submission) + self._creator_must_be_endorsed(submission) + self._must_be_unannounced(submission) + validators.submission_is_not_finalized(self, submission) + validators.cannot_be_secondary(self, self.category, submission) + + def _must_be_unannounced(self, submission: Submission) -> None: + """Can only be set on the first version before publication.""" + if submission.arxiv_id is not None or submission.version > 1: + raise InvalidEvent(self, "Can only be set on the first version," + " before publication.") + + def _creator_must_be_endorsed(self, submission: Submission) -> None: + """Creator of this event must be endorsed for the category.""" + if isinstance(self.creator, System): + return + try: + archive = taxonomy.CATEGORIES[self.category]['in_archive'] + except KeyError: + archive = self.category + if self.category not in self.creator.endorsements \ + and f'{archive}.*' not in self.creator.endorsements \ + and '*.*' not in self.creator.endorsements: + raise InvalidEvent(self, f"Creator is not endorsed for" + f" {self.category}.") + + def project(self, submission: Submission) -> Submission: + """Set :attr:`.domain.Submission.primary_classification`.""" + clsn = Classification(category=self.category) + submission.primary_classification = clsn + return submission + + def __post_init__(self): + """Ensure that we have an :class:`arxiv.taxonomy.Category`.""" + super(SetPrimaryClassification, self).__post_init__() + if self.category and not isinstance(self.category, taxonomy.Category): + self.category = taxonomy.Category(self.category) + + +@dataclass() +class AddSecondaryClassification(Event): + """Add a secondary :class:`.Classification` to a submission.""" + + NAME = "add cross-list classification" + NAMED = "cross-list classification added" + + category: Optional[taxonomy.Category] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Validate the secondary classification category to add.""" + validators.must_be_a_valid_category(self, self.category, submission) + validators.cannot_be_primary(self, self.category, submission) + validators.cannot_be_secondary(self, self.category, submission) + + def project(self, submission: Submission) -> Submission: + """Add a :class:`.Classification` as a secondary classification.""" + classification = Classification(category=self.category) + submission.secondary_classification.append(classification) + return submission + + def __post_init__(self): + """Ensure that we have an :class:`arxiv.taxonomy.Category`.""" + super(AddSecondaryClassification, self).__post_init__() + if self.category and not isinstance(self.category, taxonomy.Category): + self.category = taxonomy.Category(self.category) + + +@dataclass() +class RemoveSecondaryClassification(Event): + """Remove secondary :class:`.Classification` from submission.""" + + NAME = "remove cross-list classification" + NAMED = "cross-list classification removed" + + category: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Validate the secondary classification category to remove.""" + validators.must_be_a_valid_category(self, self.category, submission) + self._must_already_be_present(submission) + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Remove from :attr:`.Submission.secondary_classification`.""" + submission.secondary_classification = [ + classn for classn in submission.secondary_classification + if not classn.category == self.category + ] + return submission + + def _must_already_be_present(self, submission: Submission) -> None: + """One cannot remove a secondary that is not actually set.""" + if self.category not in submission.secondary_categories: + raise InvalidEvent(self, 'No such category on submission') + + +@dataclass() +class SetLicense(Event): + """The submitter has selected a license for their submission.""" + + NAME = "select distribution license" + NAMED = "distribution license selected" + + license_name: Optional[str] = field(default=None) + license_uri: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Validate the selected license.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Set :attr:`.domain.Submission.license`.""" + submission.license = License( + name=self.license_name, + uri=self.license_uri + ) + return submission + + +@dataclass() +class SetTitle(Event): + """Update the title of a submission.""" + + NAME = "update title" + NAMED = "title updated" + + title: str = field(default='') + + MIN_LENGTH = 5 + MAX_LENGTH = 240 + ALLOWED_HTML = ["br", "sup", "sub", "hr", "em", "strong", "h"] + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetTitle, self).__post_init__() + self.title = self.cleanup(self.title) + + def validate(self, submission: Submission) -> None: + """Validate the title value.""" + validators.submission_is_not_finalized(self, submission) + self._does_not_contain_html_escapes(submission) + self._acceptable_length(submission) + validators.no_trailing_period(self, submission, self.title) + if self.title.isupper(): + raise InvalidEvent(self, "Title must not be all-caps") + self._check_for_html(submission) + + def project(self, submission: Submission) -> Submission: + """Update the title on a :class:`.domain.submission.Submission`.""" + submission.metadata.title = self.title + return submission + + def _does_not_contain_html_escapes(self, submission: Submission) -> None: + """The title must not contain HTML escapes.""" + if re.search(r"\&(?:[a-z]{3,4}|#x?[0-9a-f]{1,4})\;", self.title): + raise InvalidEvent(self, "Title may not contain HTML escapes") + + def _acceptable_length(self, submission: Submission) -> None: + """Verify that the title is an acceptable length.""" + N = len(self.title) + if N < self.MIN_LENGTH or N > self.MAX_LENGTH: + raise InvalidEvent(self, f"Title must be between {self.MIN_LENGTH}" + f" and {self.MAX_LENGTH} characters") + + # In classic, this is only an admin post-hoc check. + def _check_for_html(self, submission: Submission) -> None: + """Check for disallowed HTML.""" + N = len(self.title) + N_after = len(bleach.clean(self.title, tags=self.ALLOWED_HTML, + strip=True)) + if N > N_after: + raise InvalidEvent(self, "Title contains unacceptable HTML tags") + + @staticmethod + def cleanup(value: str) -> str: + """Perform some light tidying on the title.""" + value = re.sub(r"\s+", " ", value).strip() # Single spaces only. + return value + + +@dataclass() +class SetAbstract(Event): + """Update the abstract of a submission.""" + + NAME = "update abstract" + NAMED = "abstract updated" + + abstract: str = field(default='') + + MIN_LENGTH = 20 + MAX_LENGTH = 1920 + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetAbstract, self).__post_init__() + self.abstract = self.cleanup(self.abstract) + + def validate(self, submission: Submission) -> None: + """Validate the abstract value.""" + validators.submission_is_not_finalized(self, submission) + self._acceptable_length(submission) + + def project(self, submission: Submission) -> Submission: + """Update the abstract on a :class:`.domain.submission.Submission`.""" + submission.metadata.abstract = self.abstract + return submission + + def _acceptable_length(self, submission: Submission) -> None: + N = len(self.abstract) + if N < self.MIN_LENGTH or N > self.MAX_LENGTH: + raise InvalidEvent(self, + f"Abstract must be between {self.MIN_LENGTH}" + f" and {self.MAX_LENGTH} characters") + + @staticmethod + def cleanup(value: str) -> str: + """Perform some light tidying on the abstract.""" + value = value.strip() # Remove leading or trailing spaces + # Tidy paragraphs which should be indicated with "\n ". + value = re.sub(r"[ ]+\n", "\n", value) + value = re.sub(r"\n\s+", "\n ", value) + # Newline with no following space is removed, so treated as just a + # space in paragraph. + value = re.sub(r"(\S)\n(\S)", "\g<1> \g<2>", value) + # Tab->space, multiple spaces->space. + value = re.sub(r"\t", " ", value) + value = re.sub(r"(?", value) + # Remove lone period. + value = re.sub(r"\n\.\n", "\n", value) + value = re.sub(r"\n\.$", "", value) + return value + + +@dataclass() +class SetDOI(Event): + """Update the external DOI of a submission.""" + + NAME = "add a DOI" + NAMED = "DOI added" + + doi: str = field(default='') + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetDOI, self).__post_init__() + self.doi = self.cleanup(self.doi) + + def validate(self, submission: Submission) -> None: + """Validate the DOI value.""" + if submission.status == Submission.SUBMITTED \ + and not submission.is_announced: + raise InvalidEvent(self, 'Cannot edit a finalized submission') + if not self.doi: # Can be blank. + return + for value in re.split('[;,]', self.doi): + if not self._valid_doi(value.strip()): + raise InvalidEvent(self, f"Invalid DOI: {value}") + + def project(self, submission: Submission) -> Submission: + """Update the doi on a :class:`.domain.submission.Submission`.""" + submission.metadata.doi = self.doi + return submission + + def _valid_doi(self, value: str) -> bool: + if re.match(r"^10\.\d{4,5}\/\S+$", value): + return True + return False + + @staticmethod + def cleanup(value: str) -> str: + """Perform some light tidying on the title.""" + value = re.sub(r"\s+", " ", value).strip() # Single spaces only. + return value + + +@dataclass() +class SetMSCClassification(Event): + """Update the MSC classification codes of a submission.""" + + NAME = "update MSC classification" + NAMED = "MSC classification updated" + + msc_class: str = field(default='') + + MAX_LENGTH = 160 + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetMSCClassification, self).__post_init__() + self.msc_class = self.cleanup(self.msc_class) + + def validate(self, submission: Submission) -> None: + """Validate the MSC classification value.""" + validators.submission_is_not_finalized(self, submission) + if not self.msc_class: # Blank values are OK. + return + + def project(self, submission: Submission) -> Submission: + """Update the MSC classification on a :class:`.domain.submission.Submission`.""" + submission.metadata.msc_class = self.msc_class + return submission + + @staticmethod + def cleanup(value: str) -> str: + """Perform some light fixes on the MSC classification value.""" + value = re.sub(r"\s+", " ", value).strip() + value = re.sub(r"\s*\.[\s.]*$", "", value) + value = value.replace(";", ",") # No semicolons, should be comma. + value = re.sub(r"\s*,\s*", ", ", value) # Want: comma, space. + value = re.sub(r"^MSC([\s:\-]{0,4}(classification|class|number))?" + r"([\s:\-]{0,4}\(?2000\)?)?[\s:\-]*", + "", value, flags=re.I) + return value + + +@dataclass() +class SetACMClassification(Event): + """Update the ACM classification codes of a submission.""" + + NAME = "update ACM classification" + NAMED = "ACM classification updated" + + acm_class: str = field(default='') + """E.g. F.2.2; I.2.7""" + + MAX_LENGTH = 160 + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetACMClassification, self).__post_init__() + self.acm_class = self.cleanup(self.acm_class) + + def validate(self, submission: Submission) -> None: + """Validate the ACM classification value.""" + validators.submission_is_not_finalized(self, submission) + if not self.acm_class: # Blank values are OK. + return + self._valid_acm_class(submission) + + def project(self, submission: Submission) -> Submission: + """Update the ACM classification on a :class:`.domain.submission.Submission`.""" + submission.metadata.acm_class = self.acm_class + return submission + + def _valid_acm_class(self, submission: Submission) -> None: + """Check that the value is a valid ACM class.""" + ptn = r"^[A-K]\.[0-9m](\.(\d{1,2}|m)(\.[a-o])?)?$" + for acm_class in self.acm_class.split(';'): + if not re.match(ptn, acm_class.strip()): + raise InvalidEvent(self, f"Not a valid ACM class: {acm_class}") + + @staticmethod + def cleanup(value: str) -> str: + """Perform light cleanup.""" + value = re.sub(r"\s+", " ", value).strip() + value = re.sub(r"\s*\.[\s.]*$", "", value) + value = re.sub(r"^ACM-class:\s+", "", value, flags=re.I) + value = value.replace(",", ";") + _value = [] + for v in value.split(';'): + v = v.strip().upper().rstrip('.') + v = re.sub(r"^([A-K])(\d)", "\g<1>.\g<2>", v) + v = re.sub(r"M$", "m", v) + _value.append(v) + value = "; ".join(_value) + return value + + +@dataclass() +class SetJournalReference(Event): + """Update the journal reference of a submission.""" + + NAME = "add a journal reference" + NAMED = "journal reference added" + + journal_ref: str = field(default='') + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetJournalReference, self).__post_init__() + self.journal_ref = self.cleanup(self.journal_ref) + + def validate(self, submission: Submission) -> None: + """Validate the journal reference value.""" + if not self.journal_ref: # Blank values are OK. + return + self._no_disallowed_words(submission) + self._contains_valid_year(submission) + + def project(self, submission: Submission) -> Submission: + """Update the journal reference on a :class:`.domain.submission.Submission`.""" + submission.metadata.journal_ref = self.journal_ref + return submission + + def _no_disallowed_words(self, submission: Submission) -> None: + """Certain words are not permitted.""" + for word in ['submit', 'in press', 'appear', 'accept', 'to be publ']: + if word in self.journal_ref.lower(): + raise InvalidEvent(self, + f"The word '{word}' should appear in the" + f" comments, not the Journal ref") + + def _contains_valid_year(self, submission: Submission) -> None: + """Must contain a valid year.""" + if not re.search(r"(\A|\D)(19|20)\d\d(\D|\Z)", self.journal_ref): + raise InvalidEvent(self, "Journal reference must include a year") + + @staticmethod + def cleanup(value: str) -> str: + """Perform light cleanup.""" + value = value.replace('PHYSICAL REVIEW LETTERS', + 'Physical Review Letters') + value = value.replace('PHYSICAL REVIEW', 'Physical Review') + value = value.replace('OPTICS LETTERS', 'Optics Letters') + return value + + +@dataclass() +class SetReportNumber(Event): + """Update the report number of a submission.""" + + NAME = "update report number" + NAMED = "report number updated" + + report_num: str = field(default='') + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetReportNumber, self).__post_init__() + self.report_num = self.cleanup(self.report_num) + + def validate(self, submission: Submission) -> None: + """Validate the report number value.""" + if not self.report_num: # Blank values are OK. + return + if not re.search(r"\d\d", self.report_num): + raise InvalidEvent(self, "Report number must contain two" + " consecutive digits") + + def project(self, submission: Submission) -> Submission: + """Set report number on a :class:`.domain.submission.Submission`.""" + submission.metadata.report_num = self.report_num + return submission + + @staticmethod + def cleanup(value: str) -> str: + """Light cleanup on report number value.""" + value = re.sub(r"\s+", " ", value).strip() + value = re.sub(r"\s*\.[\s.]*$", "", value) + return value + + +@dataclass() +class SetComments(Event): + """Update the comments of a submission.""" + + NAME = "update comments" + NAMED = "comments updated" + + comments: str = field(default='') + + MAX_LENGTH = 400 + + def __post_init__(self): + """Perform some light cleanup on the provided value.""" + super(SetComments, self).__post_init__() + self.comments = self.cleanup(self.comments) + + def validate(self, submission: Submission) -> None: + """Validate the comments value.""" + validators.submission_is_not_finalized(self, submission) + if not self.comments: # Blank values are OK. + return + if len(self.comments) > self.MAX_LENGTH: + raise InvalidEvent(self, f"Comments must be no more than" + f" {self.MAX_LENGTH} characters long") + + def project(self, submission: Submission) -> Submission: + """Update the comments on a :class:`.domain.submission.Submission`.""" + submission.metadata.comments = self.comments + return submission + + @staticmethod + def cleanup(value: str) -> str: + """Light cleanup on comment value.""" + value = re.sub(r"\s+", " ", value).strip() + value = re.sub(r"\s*\.[\s.]*$", "", value) + return value + + +@dataclass() +class SetAuthors(Event): + """Update the authors on a :class:`.domain.submission.Submission`.""" + + NAME = "update authors" + NAMED = "authors updated" + + authors: List[Author] = field(default_factory=list) + authors_display: Optional[str] = field(default=None) + """The authors string may be provided.""" + + def __post_init__(self): + """Autogenerate and/or clean display names.""" + super(SetAuthors, self).__post_init__() + self.authors = [Author(**a) if type(a) is dict else a + for a in self.authors] + if not self.authors_display: + self.authors_display = self._canonical_author_string() + self.authors_display = self.cleanup(self.authors_display) + + def validate(self, submission: Submission) -> None: + """May not apply to a finalized submission.""" + validators.submission_is_not_finalized(self, submission) + self._does_not_contain_et_al() + + def _canonical_author_string(self) -> str: + """Canonical representation of authors, using display names.""" + return ", ".join([au.display for au in self.authors]) + + @staticmethod + def cleanup(s: str) -> str: + """Perform some light tidying on the provided author string(s).""" + s = re.sub(r"\s+", " ", s) # Single spaces only. + s = re.sub(r",(\s*,)+", ",", s) # Remove double commas. + # Add spaces between word and opening parenthesis. + s = re.sub(r"(\w)\(", r"\g<1> (", s) + # Add spaces between closing parenthesis and word. + s = re.sub(r"\)(\w)", r") \g<1>", s) + # Change capitalized or uppercase `And` to `and`. + s = re.sub(r"\bA(?i:ND)\b", "and", s) + return s.strip() # Removing leading and trailing whitespace. + + def _does_not_contain_et_al(self) -> None: + """The authors display value should not contain `et al`.""" + if self.authors_display and \ + re.search(r"et al\.?($|\s*\()", self.authors_display): + raise InvalidEvent(self, "Authors should not contain et al.") + + def project(self, submission: Submission) -> Submission: + """Replace :attr:`.Submission.metadata.authors`.""" + submission.metadata.authors = self.authors + submission.metadata.authors_display = self.authors_display + return submission + + +@dataclass() +class SetUploadPackage(Event): + """Set the upload workspace for this submission.""" + + NAME = "set the upload package" + NAMED = "upload package set" + + identifier: str = field(default_factory=str) + checksum: str = field(default_factory=str) + uncompressed_size: int = field(default=0) + compressed_size: int = field(default=0) + source_format: SubmissionContent.Format = \ + field(default=SubmissionContent.Format.UNKNOWN) + + def __post_init__(self) -> None: + """Make sure that `source_format` is an enum instance.""" + super(SetUploadPackage, self).__post_init__() + if type(self.source_format) is str: + self.source_format = SubmissionContent.Format(self.source_format) + + def validate(self, submission: Submission) -> None: + """Validate data for :class:`.SetUploadPackage`.""" + validators.submission_is_not_finalized(self, submission) + + if not self.identifier: + raise InvalidEvent(self, 'Missing upload ID') + + def project(self, submission: Submission) -> Submission: + """Replace :class:`.SubmissionContent` metadata on the submission.""" + submission.source_content = SubmissionContent( + checksum=self.checksum, + identifier=self.identifier, + uncompressed_size=self.uncompressed_size, + compressed_size=self.compressed_size, + source_format=self.source_format, + ) + submission.submitter_confirmed_preview = False + return submission + + +@dataclass() +class UpdateUploadPackage(Event): + """Update the upload workspace on this submission.""" + + NAME = "update the upload package" + NAMED = "upload package updated" + + checksum: str = field(default_factory=str) + uncompressed_size: int = field(default=0) + compressed_size: int = field(default=0) + source_format: SubmissionContent.Format = \ + field(default=SubmissionContent.Format.UNKNOWN) + + def __post_init__(self) -> None: + """Make sure that `source_format` is an enum instance.""" + super(UpdateUploadPackage, self).__post_init__() + if type(self.source_format) is str: + self.source_format = SubmissionContent.Format(self.source_format) + + def validate(self, submission: Submission) -> None: + """Validate data for :class:`.SetUploadPackage`.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Replace :class:`.SubmissionContent` metadata on the submission.""" + submission.source_content.source_format = self.source_format + submission.source_content.checksum = self.checksum + submission.source_content.uncompressed_size = self.uncompressed_size + submission.source_content.compressed_size = self.compressed_size + submission.submitter_confirmed_preview = False + return submission + + +@dataclass() +class UnsetUploadPackage(Event): + """Unset the upload workspace for this submission.""" + + NAME = "unset the upload package" + NAMED = "upload package unset" + + def validate(self, submission: Submission) -> None: + """Validate data for :class:`.UnsetUploadPackage`.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.source_content` to None.""" + submission.source_content = None + submission.submitter_confirmed_preview = False + return submission + + +@dataclass() +class ConfirmCompiledPreview(Event): + """Confirm that the submitter successfully compiled a preview.""" + + NAME = "confirm submission preview is compiled" + NAMED = "confirmed that submission preview was compiled" + + def validate(self, submission: Submission) -> None: + return + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.submitter_compiled_preview`.""" + submission.submitter_compiled_preview = True + return submission + + +@dataclass() +class UnConfirmCompiledPreview(Event): + """Unconfirm that the submitter successfully compiled a preview.""" + + NAME = "unconfirm submission preview is compiled" + NAMED = "unconfirmed that submission preview was compiled" + + def validate(self, submission: Submission) -> None: + return + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.submitter_compiled_preview`.""" + submission.submitter_compiled_preview = False + return submission + + +@dataclass() +class ConfirmPreview(Event): + """Confirm that the paper and abstract previews are acceptable.""" + + NAME = "approve submission preview" + NAMED = "submission preview approved" + + def validate(self, submission: Submission) -> None: + """Validate data for :class:`.ConfirmPreview`.""" + validators.submission_is_not_finalized(self, submission) + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.submitter_confirmed_preview`.""" + submission.submitter_confirmed_preview = True + return submission + + +@dataclass(init=False) +class FinalizeSubmission(Event): + """Send the submission to the queue for announcement.""" + + NAME = "finalize submission for announcement" + NAMED = "submission finalized" + + REQUIRED = [ + 'creator', 'primary_classification', 'submitter_contact_verified', + 'submitter_accepts_policy', 'license', 'source_content', 'metadata', + ] + REQUIRED_METADATA = ['title', 'abstract', 'authors_display'] + + def validate(self, submission: Submission) -> None: + """Ensure that all required data/steps are complete.""" + if submission.is_finalized: + raise InvalidEvent(self, "Submission already finalized") + if not submission.is_active: + raise InvalidEvent(self, "Submission must be active") + self._required_fields_are_complete(submission) + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.is_finalized`.""" + submission.status = Submission.SUBMITTED + submission.submitted = datetime.now(UTC) + return submission + + def _required_fields_are_complete(self, submission: Submission) -> None: + """Verify that all required fields are complete.""" + for key in self.REQUIRED: + if not getattr(submission, key): + raise InvalidEvent(self, f"Missing {key}") + for key in self.REQUIRED_METADATA: + if not getattr(submission.metadata, key): + raise InvalidEvent(self, f"Missing {key}") + + +@dataclass() +class UnFinalizeSubmission(Event): + """Withdraw the submission from the queue for announcement.""" + + NAME = "re-open submission for modification" + NAMED = "submission re-opened for modification" + + def validate(self, submission: Submission) -> None: + """Validate the unfinalize action.""" + self._must_be_finalized(submission) + if submission.is_announced: + raise InvalidEvent(self, "Cannot unfinalize an announced paper") + + def _must_be_finalized(self, submission: Submission) -> None: + """May only unfinalize a finalized submission.""" + if not submission.is_finalized: + raise InvalidEvent(self, "Submission is not finalized") + + def project(self, submission: Submission) -> Submission: + """Set :attr:`Submission.is_finalized`.""" + submission.status = Submission.WORKING + submission.submitted = None + return submission + + +@dataclass() +class Announce(Event): + """Announce the current version of the submission.""" + + NAME = "publish submission" + NAMED = "submission announced" + + arxiv_id: Optional[str] = None + + def validate(self, submission: Submission) -> None: + """Make sure that we have a valid arXiv ID.""" + # TODO: When we're using this to perform publish in NG, we will want to + # re-enable this step. + # + # if not submission.status == Submission.SUBMITTED: + # raise InvalidEvent(self, + # "Can't publish in state %s" % submission.status) + # if self.arxiv_id is None: + # raise InvalidEvent(self, "Must provide an arXiv ID.") + # try: + # arxiv_identifier.parse_arxiv_id(self.arxiv_id) + # except ValueError: + # raise InvalidEvent(self, "Not a valid arXiv ID.") + + def project(self, submission: Submission) -> Submission: + """Set the arXiv ID on the submission.""" + submission.arxiv_id = self.arxiv_id + submission.status = Submission.ANNOUNCED + submission.versions.append(copy.deepcopy(submission)) + return submission + + +# Moderation-related events. + + +# @dataclass() +# class CreateComment(Event): +# """Creation of a :class:`.Comment` on a :class:`.domain.submission.Submission`.""" +# +# read_scope = 'submission:moderate' +# write_scope = 'submission:moderate' +# +# body: str = field(default_factory=str) +# scope: str = 'private' +# +# def validate(self, submission: Submission) -> None: +# """The :attr:`.body` should be set.""" +# if not self.body: +# raise ValueError('Comment body not set') +# +# def project(self, submission: Submission) -> Submission: +# """Create a new :class:`.Comment` and attach it to the submission.""" +# submission.comments[self.event_id] = Comment( +# event_id=self.event_id, +# creator=self.creator, +# created=self.created, +# proxy=self.proxy, +# submission=submission, +# body=self.body +# ) +# return submission +# +# +# @dataclass() +# class DeleteComment(Event): +# """Deletion of a :class:`.Comment` on a :class:`.domain.submission.Submission`.""" +# +# read_scope = 'submission:moderate' +# write_scope = 'submission:moderate' +# +# comment_id: str = field(default_factory=str) +# +# def validate(self, submission: Submission) -> None: +# """The :attr:`.comment_id` must present on the submission.""" +# if self.comment_id is None: +# raise InvalidEvent(self, 'comment_id is required') +# if not hasattr(submission, 'comments') or not submission.comments: +# raise InvalidEvent(self, 'Cannot delete nonexistant comment') +# if self.comment_id not in submission.comments: +# raise InvalidEvent(self, 'Cannot delete nonexistant comment') +# +# def project(self, submission: Submission) -> Submission: +# """Remove the comment from the submission.""" +# del submission.comments[self.comment_id] +# return submission +# +# +# @dataclass() +# class AddDelegate(Event): +# """Owner delegates authority to another agent.""" +# +# delegate: Optional[Agent] = None +# +# def validate(self, submission: Submission) -> None: +# """The event creator must be the owner of the submission.""" +# if not self.creator == submission.owner: +# raise InvalidEvent(self, 'Event creator must be submission owner') +# +# def project(self, submission: Submission) -> Submission: +# """Add the delegate to the submission.""" +# delegation = Delegation( +# creator=self.creator, +# delegate=self.delegate, +# created=self.created +# ) +# submission.delegations[delegation.delegation_id] = delegation +# return submission +# +# +# @dataclass() +# class RemoveDelegate(Event): +# """Owner revokes authority from another agent.""" +# +# delegation_id: str = field(default_factory=str) +# +# def validate(self, submission: Submission) -> None: +# """The event creator must be the owner of the submission.""" +# if not self.creator == submission.owner: +# raise InvalidEvent(self, 'Event creator must be submission owner') +# +# def project(self, submission: Submission) -> Submission: +# """Remove the delegate from the submission.""" +# if self.delegation_id in submission.delegations: +# del submission.delegations[self.delegation_id] +# return submission + + +@dataclass() +class AddFeature(Event): + """Add feature metadata to a submission.""" + + NAME = "add feature metadata" + NAMED = "feature metadata added" + + feature_type: Feature.Type = \ + field(default=Feature.Type.WORD_COUNT) + feature_value: Union[float, int] = field(default=0) + + def validate(self, submission: Submission) -> None: + """Verify that the feature type is a known value.""" + if self.feature_type not in Feature.Type: + valid_types = ", ".join([ft.value for ft in Feature.Type]) + raise InvalidEvent(self, "Must be one of %s" % valid_types) + + def project(self, submission: Submission) -> Submission: + """Add the annotation to the submission.""" + submission.annotations[self.event_id] = Feature( + event_id=self.event_id, + creator=self.creator, + created=self.created, + proxy=self.proxy, + feature_type=self.feature_type, + feature_value=self.feature_value + ) + return submission + + +@dataclass() +class AddClassifierResults(Event): + """Add the results of a classifier to a submission.""" + + NAME = "add classifer results" + NAMED = "classifier results added" + + classifier: ClassifierResults.Classifiers \ + = field(default=ClassifierResults.Classifiers.CLASSIC) + results: List[ClassifierResult] = field(default_factory=list) + + def validate(self, submission: Submission) -> None: + """Verify that the classifier is a known value.""" + if self.classifier not in ClassifierResults.Classifiers: + valid = ", ".join([c.value for c in ClassifierResults.Classifiers]) + raise InvalidEvent(self, "Must be one of %s" % valid) + + def project(self, submission: Submission) -> Submission: + """Add the annotation to the submission.""" + submission.annotations[self.event_id] = ClassifierResults( + event_id=self.event_id, + creator=self.creator, + created=self.created, + proxy=self.proxy, + classifier=self.classifier, + results=self.results + ) + return submission + + +@dataclass() +class Reclassify(Event): + """Reclassify a submission.""" + + NAME = "reclassify submission" + NAMED = "submission reclassified" + + category: Optional[taxonomy.Category] = None + + def validate(self, submission: Submission) -> None: + """Validate the primary classification category.""" + validators.must_be_a_valid_category(self, self.category, submission) + self._must_be_unannounced(submission) + validators.cannot_be_secondary(self, self.category, submission) + + def _must_be_unannounced(self, submission: Submission) -> None: + """Can only be set on the first version before publication.""" + if submission.arxiv_id is not None or submission.version > 1: + raise InvalidEvent(self, "Can only be set on the first version," + " before publication.") + + def project(self, submission: Submission) -> Submission: + """Set :attr:`.domain.Submission.primary_classification`.""" + clsn = Classification(category=self.category) + submission.primary_classification = clsn + return submission diff --git a/core/arxiv/submission/domain/event/base.py b/core/arxiv/submission/domain/event/base.py new file mode 100644 index 0000000..6316cd2 --- /dev/null +++ b/core/arxiv/submission/domain/event/base.py @@ -0,0 +1,331 @@ +"""Provides the base event class.""" + +from typing import Optional, Callable, Tuple, Iterable, List, ClassVar, Mapping +from collections import defaultdict +from datetime import datetime +import hashlib +import copy +from pytz import UTC +from functools import wraps +from flask import current_app +from dataclasses import field, asdict +from .util import dataclass + +from arxiv.base import logging +from arxiv.base.globals import get_application_config + +from ..agent import Agent, System, agent_factory +from ...exceptions import InvalidEvent +from ..util import get_tzaware_utc_now +from ..submission import Submission +from .versioning import EventData, map_to_current_version + +logger = logging.getLogger(__name__) +logger.propagate = False + +Events = Iterable['Event'] +Condition = Callable[['Event', Submission, Submission], bool] +Callback = Callable[['Event', Submission, Submission, Agent], Events] +Decorator = Callable[[Callable], Callable] +Rule = Tuple[Condition, Callback] +Store = Callable[['Event', Submission, Submission], Tuple['Event', Submission]] + + +class EventType(type): + """Metaclass for :class:`.Event`\.""" + + +@dataclass() +class Event(metaclass=EventType): + """ + Base class for submission-related events/commands. + + An event represents a change to a :class:`.domain.submission.Submission`. + Rather than changing submissions directly, an application should create + (and store) events. Each event class must inherit from this base class, + extend it with whatever data is needed for the event, and define methods + for validation and projection (changing a submission): + + - ``validate(self, submission: Submission) -> None`` should raise + :class:`.InvalidEvent` if the event instance has invalid data. + - ``project(self, submission: Submission) -> Submission`` should perform + changes to the :class:`.domain.submission.Submission` and return it. + + An event class also provides a hook for doing things automatically when the + submission changes. To register a function that gets called when an event + is committed, use the :func:`bind` method. + """ + + creator: Agent + """ + The agent responsible for the operation represented by this event. + + This is **not** necessarily the creator of the submission. + """ + + created: Optional[datetime] = field(default=None) # get_tzaware_utc_now + """The timestamp when the event was originally committed.""" + + proxy: Optional[Agent] = field(default=None) + """ + The agent who facilitated the operation on behalf of the :attr:`.creator`. + + This may be an API client, or another user who has been designated as a + proxy. Note that proxy implies that the creator was not directly involved. + """ + + client: Optional[Agent] = field(default=None) + """ + The client through which the :attr:`.creator` performed the operation. + + If the creator was directly involved in the operation, this property should + be the client that facilitated the operation. + """ + + submission_id: Optional[int] = field(default=None) + """ + The primary identifier of the submission being operated upon. + + This is defined as optional to support creation events, and to facilitate + chaining of events with creation events in the same transaction. + """ + + committed: bool = field(default=False) + """ + Indicates whether the event has been committed to the database. + + This should generally not be set from outside this package. + """ + + before: Optional[Submission] = None + """The state of the submission prior to the event.""" + + after: Optional[Submission] = None + """The state of the submission after the event.""" + + event_type: str = field(default_factory=str) + event_version: str = field(default_factory=str) + + _hooks: ClassVar[Mapping[type, List[Rule]]] = defaultdict(list) + + def __post_init__(self): + """Make sure data look right.""" + self.event_type = self.get_event_type() + self.event_version = self.get_event_version() + if self.client and type(self.client) is dict: + self.client = agent_factory(**self.client) + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + if self.before and type(self.before) is dict: + self.before = Submission(**self.before) + if self.after and type(self.after) is dict: + self.after = Submission(**self.after) + + @staticmethod + def get_event_version() -> str: + return get_application_config().get('CORE_VERSION', '0.0.0') + + @classmethod + def get_event_type(cls) -> str: + """Get the name of the event type.""" + return cls.__name__ + + @property + def event_id(self) -> str: + """Unique ID for this event.""" + if not self.created: + raise RuntimeError('Event not yet committed') + return self.get_id(self.created, self.event_type, self.creator) + + @staticmethod + def get_id(created: datetime, event_type: str, creator: Agent) -> str: + h = hashlib.new('sha1') + h.update(b'%s:%s:%s' % (created.isoformat().encode('utf-8'), + event_type.encode('utf-8'), + creator.agent_identifier.encode('utf-8'))) + return h.hexdigest() + + def apply(self, submission: Optional[Submission] = None) -> Submission: + """Apply the projection for this :class:`.Event` instance.""" + self.before = copy.deepcopy(submission) + self.validate(submission) + if submission is not None: + self.after = self.project(copy.deepcopy(submission)) + else: + logger.debug('Submission is None; project without submission.') + self.after = self.project() + self.after.updated = self.created + + # Make sure that the submission has its own ID, if we know what it is. + if self.after.submission_id is None and self.submission_id is not None: + self.after.submission_id = self.submission_id + if self.submission_id is None and self.after.submission_id is not None: + self.submission_id = self.after.submission_id + return self.after + + @classmethod + def bind(cls, condition: Optional[Condition] = None) -> Decorator: + """ + Generate a decorator to bind a callback to an event type. + + To register a function that will be called whenever an event is + committed, decorate it like so: + + .. code-block:: python + + @MyEvent.bind() + def say_hello(event: MyEvent, before: Submission, + after: Submission, creator: Agent) -> Iterable[Event]: + yield SomeOtherEvent(...) + + The callback function will be passed the event that triggered it, the + state of the submission before and after the triggering event was + applied, and a :class:`.System` agent that can be used as the creator + of subsequent events. It should return an iterable of other + :class:`.Event` instances, either by yielding them, or by + returning an iterable object of some kind. + + By default, callbacks will only be called if the creator of the + trigger event is not a :class:`.System` instance. This makes it less + easy to define infinite chains of callbacks. You can pass a custom + condition to the decorator, for example: + + .. code-block:: python + + def jill_created_an_event(event: MyEvent, before: Submission, + after: Submission) -> bool: + return event.creator.username == 'jill' + + + @MyEvent.bind(jill_created_an_event) + def say_hi(event: MyEvent, before: Submission, + after: Submission, creator: Agent) -> Iterable[Event]: + yield SomeOtherEvent(...) + + Note that the condition signature is ``(event: MyEvent, before: + Submission, after: Submission) -> bool``\. + + Parameters + ---------- + condition : Callable + A callable with the signature ``(event: Event, before: Submission, + after: Submission) -> bool``. If this callable returns ``True``, + the callback will be triggered when the event to which it is bound + is saved. The default condition is that the event was not created + by :class:`System` + + Returns + ------- + Callable + Decorator for a callback function, with signature ``(event: Event, + before: Submission, after: Submission, creator: Agent = + System(...)) -> Iterable[Event]``. + + """ + if condition is None: + def _creator_is_not_system(e: Event, *args, **kwargs) -> bool: + return type(e.creator) is not System + condition = _creator_is_not_system + + def decorator(func: Callback) -> Callback: + """Register a callback for an event type and condition.""" + name = f'{cls.__name__}::{func.__module__}.{func.__name__}' + sys = System(name) + setattr(func, '__name__', name) + + @wraps(func) + def do(event: Event, before: Submission, after: Submission, + creator: Agent = sys, **kwargs) -> Iterable['Event']: + """Perform the callback. Here in case we need to hook in.""" + return func(event, before, after, creator, **kwargs) + + cls._add_callback(condition, do) + return do + return decorator + + @classmethod + def _add_callback(cls: type, condition: Condition, + callback: Callback) -> None: + cls._hooks[cls].append((condition, callback)) + + def _get_callbacks(self) -> List[Tuple[Condition, Callback]]: + return ((condition, callback) for cls in type(self).__mro__[::-1] + for condition, callback in self._hooks[cls]) + + def _should_apply_callbacks(self) -> bool: + config = get_application_config() + return bool(int(config.get('ENABLE_CALLBACKS', '0'))) + + def commit(self, store: Store) -> Tuple[Submission, Events]: + """ + Persist this event instance using an injected store method. + + Parameters + ---------- + save : Callable + Should have signature ``(*Event, submission_id: int) -> + Tuple[Event, Submission]``. + + Returns + ------- + :class:`Submission` + State of the submission after storage. Some changes may have been + made to ensure consistency with the underlying datastore. + list + Items are :class:`Event` instances. + + """ + _, after = store(self, self.before, self.after) + self.committed = True + if not self._should_apply_callbacks(): + return self.after, [] + consequences: List[Event] = [] + for condition, callback in self._get_callbacks(): + if condition(self, self.before, self.after): + for consequence in callback(self, self.before, self.after): + consequence.created = datetime.now(UTC) + self.after = consequence.apply(self.after) + consequences.append(consequence) + self.after, addl_consequences = consequence.commit(store) + for addl in addl_consequences: + consequences.append(addl) + return self.after, consequences + + +def _get_subclasses(klass: type) -> List[type]: + _subclasses = klass.__subclasses__() + if _subclasses: + return _subclasses + [sub for klass in _subclasses + for sub in _get_subclasses(klass)] + return _subclasses + + +def event_factory(**data: EventData) -> Event: + """ + Generate an :class:`Event` instance from raw :const:`EventData`. + + Parameters + ---------- + event_type : str + Should be the name of a :class:`.Event` subclass. + data : kwargs + Keyword parameters passed to the event constructor. + + Returns + ------- + :class:`.Event` + An instance of an :class:`.Event` subclass. + + """ + etypes = {klas.get_event_type(): klas for klas in _get_subclasses(Event)} + data = map_to_current_version(data) + event_type = data.pop("event_type") + event_version = data.pop("event_version") + logger.debug('Create %s with data version %s', event_type, event_version) + if 'created' not in data: + data['created'] = datetime.now(UTC) + if event_type in etypes: + return etypes[event_type](**data) + raise RuntimeError('Unknown event type: %s' % event_type) diff --git a/core/arxiv/submission/domain/event/flag.py b/core/arxiv/submission/domain/event/flag.py new file mode 100644 index 0000000..c728c2b --- /dev/null +++ b/core/arxiv/submission/domain/event/flag.py @@ -0,0 +1,249 @@ +"""Events/commands related to quality assurance.""" + +from typing import Optional, Union + +from dataclasses import field + +from .util import dataclass +from .base import Event +from ..flag import Flag, ContentFlag, MetadataFlag, UserFlag +from ..submission import Submission, SubmissionMetadata, Hold, Waiver +from ...exceptions import InvalidEvent + + +@dataclass() +class AddFlag(Event): + """Base class for flag events; not for direct use.""" + + NAME = "add flag" + NAMED = "flag added" + + flag_data: Optional[Union[int, str, float, dict, list]] \ + = field(default=None) + comment: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Not implemented.""" + raise NotImplementedError("Invoke a child event instead") + + def project(self, submission: Submission) -> Submission: + """Not implemented.""" + raise NotImplementedError("Invoke a child event instead") + + +@dataclass() +class RemoveFlag(Event): + """Remove a :class:`.domain.Flag` from a submission.""" + + NAME = "remove flag" + NAMED = "flag removed" + + flag_id: Optional[str] = field(default=None) + """This is the ``event_id`` of the event that added the flag.""" + + def validate(self, submission: Submission) -> None: + """Verify that the flag exists.""" + if self.flag_id not in submission.flags: + raise InvalidEvent(self, f"Unknown flag: {self.flag_id}") + + def project(self, submission: Submission) -> Submission: + """Remove the flag from the submission.""" + submission.flags.pop(self.flag_id) + return submission + + +@dataclass() +class AddContentFlag(AddFlag): + """Add a :class:`.domain.ContentFlag` related to content.""" + + NAME = "add content flag" + NAMED = "content flag added" + + flag_type: Optional[ContentFlag.Type] = None + + def validate(self, submission: Submission) -> None: + """Verify that we have a known flag.""" + if self.flag_type not in ContentFlag.Type: + raise InvalidEvent(self, f"Unknown content flag: {self.flag_type}") + + def project(self, submission: Submission) -> Submission: + """Add the flag to the submission.""" + submission.flags[self.event_id] = ContentFlag( + event_id=self.event_id, + created=self.created, + creator=self.creator, + proxy=self.proxy, + flag_type=self.flag_type, + flag_data=self.flag_data, + comment=self.comment + ) + return submission + + def __post_init__(self) -> None: + """Make sure that `flag_type` is an enum instance.""" + if type(self.flag_type) is str: + self.flag_type = ContentFlag.Type(self.flag_type) + super(AddContentFlag, self).__post_init__() + + +@dataclass() +class AddMetadataFlag(AddFlag): + """Add a :class:`.domain.MetadataFlag` related to the metadata.""" + + NAME = "add metadata flag" + NAMED = "metadata flag added" + + flag_type: Optional[MetadataFlag.Type] = field(default=None) + field: Optional[str] = field(default=None) + """Name of the metadata field to which the flag applies.""" + + def validate(self, submission: Submission) -> None: + """Verify that we have a known flag and metadata field.""" + if self.flag_type not in MetadataFlag.Type: + raise InvalidEvent(self, f"Unknown meta flag: {self.flag_type}") + if not hasattr(SubmissionMetadata, self.field): + raise InvalidEvent(self, "Not a valid metadata field") + + def project(self, submission: Submission) -> Submission: + """Add the flag to the submission.""" + submission.flags[self.event_id] = MetadataFlag( + event_id=self.event_id, + created=self.created, + creator=self.creator, + proxy=self.proxy, + flag_type=self.flag_type, + flag_data=self.flag_data, + comment=self.comment, + field=self.field + ) + return submission + + def __post_init__(self) -> None: + """Make sure that `flag_type` is an enum instance.""" + if type(self.flag_type) is str: + self.flag_type = MetadataFlag.Type(self.flag_type) + super(AddMetadataFlag, self).__post_init__() + + +@dataclass() +class AddUserFlag(AddFlag): + """Add a :class:`.domain.UserFlag` related to the submitter.""" + + NAME = "add user flag" + NAMED = "user flag added" + + flag_type: Optional[UserFlag.Type] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Verify that we have a known flag.""" + if self.flag_type not in MetadataFlag.Type: + raise InvalidEvent(self, f"Unknown user flag: {self.flag_type}") + + def project(self, submission: Submission) -> Submission: + """Add the flag to the submission.""" + submission.flags[self.event_id] = UserFlag( + event_id=self.event_id, + created=self.created, + creator=self.creator, + flag_type=self.flag_type, + flag_data=self.flag_data, + comment=self.comment + ) + return submission + + def __post_init__(self) -> None: + """Make sure that `flag_type` is an enum instance.""" + if type(self.flag_type) is str: + self.flag_type = UserFlag.Type(self.flag_type) + super(AddUserFlag, self).__post_init__() + + +@dataclass() +class AddHold(Event): + """Add a :class:`.Hold` to a :class:`.Submission`.""" + + NAME = "add hold" + NAMED = "hold added" + + hold_type: Hold.Type = field(default=Hold.Type.PATCH) + hold_reason: Optional[str] = field(default_factory=str) + + def validate(self, submission: Submission) -> None: + pass + + def project(self, submission: Submission) -> Submission: + """Add the hold to the submission.""" + submission.holds[self.event_id] = Hold( + event_id=self.event_id, + created=self.created, + creator=self.creator, + hold_type=self.hold_type, + hold_reason=self.hold_reason + ) + # submission.status = Submission.ON_HOLD + return submission + + def __post_init__(self) -> None: + """Make sure that `hold_type` is an enum instance.""" + if type(self.hold_type) is str: + self.hold_type = Hold.Type(self.hold_type) + super(AddHold, self).__post_init__() + + +@dataclass() +class RemoveHold(Event): + """Remove a :class:`.Hold` from a :class:`.Submission`.""" + + NAME = "remove hold" + NAMED = "hold removed" + + hold_event_id: str = field(default_factory=str) + hold_type: Hold.Type = field(default=Hold.Type.PATCH) + removal_reason: Optional[str] = field(default_factory=str) + + def validate(self, submission: Submission) -> None: + if self.hold_event_id not in submission.holds: + raise InvalidEvent(self, "No such hold") + + def project(self, submission: Submission) -> Submission: + """Remove the hold from the submission.""" + submission.holds.pop(self.hold_event_id) + # submission.status = Submission.SUBMITTED + return submission + + def __post_init__(self) -> None: + """Make sure that `hold_type` is an enum instance.""" + if type(self.hold_type) is str: + self.hold_type = Hold.Type(self.hold_type) + super(RemoveHold, self).__post_init__() + + +@dataclass() +class AddWaiver(Event): + """Add a :class:`.Waiver` to a :class:`.Submission`.""" + + NAME = "add waiver" + NAMED = "waiver added" + + waiver_type: Hold.Type = field(default=Hold.Type.SOURCE_OVERSIZE) + waiver_reason: str = field(default_factory=str) + + def validate(self, submission: Submission) -> None: + pass + + def project(self, submission: Submission) -> Submission: + """Add the :class:`.Waiver` to the :class:`.Submission`.""" + submission.waivers[self.event_id] = Waiver( + event_id=self.event_id, + created=self.created, + creator=self.creator, + waiver_type=self.waiver_type, + waiver_reason=self.waiver_reason + ) + return submission + + def __post_init__(self) -> None: + """Make sure that `waiver_type` is an enum instance.""" + if type(self.waiver_type) is str: + self.waiver_type = Hold.Type(self.waiver_type) + super(AddWaiver, self).__post_init__() diff --git a/core/arxiv/submission/domain/event/process.py b/core/arxiv/submission/domain/event/process.py new file mode 100644 index 0000000..f5c8765 --- /dev/null +++ b/core/arxiv/submission/domain/event/process.py @@ -0,0 +1,49 @@ +"""Events related to external or long-running processes.""" + +from typing import Optional + +from dataclasses import field + +from ...exceptions import InvalidEvent +from ..submission import Submission +from ..process import ProcessStatus +from .base import Event +from .util import dataclass + + +@dataclass() +class AddProcessStatus(Event): + """Add the status of an external/long-running process to a submission.""" + + NAME = "add status of a process" + NAMED = "added status of a process" + + Status = ProcessStatus.Status + + process_id: Optional[str] = field(default=None) + process: Optional[str] = field(default=None) + step: Optional[str] = field(default=None) + status: Status = field(default=Status.PENDING) + reason: Optional[str] = field(default=None) + + def __post_init__(self) -> None: + """Make sure our enums are in order.""" + super(AddProcessStatus, self).__post_init__() + self.status = self.Status(self.status) + + def validate(self, submission: Submission) -> None: + """Verify that we have a :class:`.ProcessStatus`.""" + if self.process is None: + raise InvalidEvent(self, "Must include process") + + def project(self, submission: Submission) -> Submission: + """Add the process status to the submission.""" + submission.processes.append(ProcessStatus( + creator=self.creator, + created=self.created, + process=self.process, + step=self.step, + status=self.status, + reason=self.reason + )) + return submission diff --git a/core/arxiv/submission/domain/event/proposal.py b/core/arxiv/submission/domain/event/proposal.py new file mode 100644 index 0000000..f6ae6f4 --- /dev/null +++ b/core/arxiv/submission/domain/event/proposal.py @@ -0,0 +1,140 @@ +"""Commands for working with :class:`.Proposal` instances on submissions.""" + +import hashlib +import re +import copy +from datetime import datetime +from pytz import UTC +from typing import Optional, TypeVar, List, Tuple, Any, Dict, Iterable +from urllib.parse import urlparse +from dataclasses import field, asdict +from .util import dataclass +import bleach + +from arxiv.util import schema +from arxiv import taxonomy, identifier +from arxiv.base import logging + +from ..agent import Agent +from ..submission import Submission, SubmissionMetadata, Author, \ + Classification, License, Delegation, \ + SubmissionContent, WithdrawalRequest, CrossListClassificationRequest +from ..proposal import Proposal +from ..annotation import Comment + +from ...exceptions import InvalidEvent +from ..util import get_tzaware_utc_now +from .base import Event +from .request import RequestCrossList, RequestWithdrawal, ApplyRequest, \ + RejectRequest, ApproveRequest +from . import validators + +logger = logging.getLogger(__name__) + + +@dataclass() +class AddProposal(Event): + """Add a new proposal to a :class:`Submission`.""" + + NAME = 'add proposal' + NAMED = 'proposal added' + + proposed_event_type: Optional[type] = field(default=None) + proposed_event_data: dict = field(default_factory=dict) + comment: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Simulate applying the proposal to check for validity.""" + if self.proposed_event_type is None: + raise InvalidEvent(self, f"Proposed event type is required") + proposed_event_data = copy.deepcopy(self.proposed_event_data) + proposed_event_data.update({'creator': self.creator}) + event = self.proposed_event_type(**proposed_event_data) + event.validate(submission) + + def project(self, submission: Submission) -> Submission: + """Add the proposal to the submission.""" + submission.proposals[self.event_id] = Proposal( + event_id=self.event_id, + creator=self.creator, + created=self.created, + proxy=self.proxy, + proposed_event_type=self.proposed_event_type, + proposed_event_data=self.proposed_event_data, + comments=[Comment(event_id=self.event_id, creator=self.creator, + created=self.created, proxy=self.proxy, + body=self.comment)], + status=Proposal.Status.PENDING + ) + return submission + + +@dataclass() +class RejectProposal(Event): + """Reject a :class:`.Proposal` on a submission.""" + + NAME = 'reject proposal' + NAMED = 'proposal rejected' + + proposal_id: Optional[str] = field(default=None) + comment: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Ensure that the proposal isn't already approved or rejected.""" + if self.proposal_id not in submission.proposals: + raise InvalidEvent(self, f"No such proposal {self.proposal_id}") + elif submission.proposals[self.proposal_id].is_rejected(): + raise InvalidEvent(self, f"{self.proposal_id} is already rejected") + elif submission.proposals[self.proposal_id].is_accepted(): + raise InvalidEvent(self, f"{self.proposal_id} is accepted") + + def project(self, submission: Submission) -> Submission: + """Set the status of the proposal to rejected.""" + submission.proposals[self.proposal_id].status = Proposal.REJECTED + if self.comment: + submission.proposals[self.proposal_id].comments.append( + Comment(event_id=self.event_id, creator=self.creator, + created=self.created, proxy=self.proxy, + body=self.comment)) + return submission + + +@dataclass() +class AcceptProposal(Event): + """Accept a :class:`.Proposal` on a submission.""" + + NAME = 'accept proposal' + NAMED = 'proposal accepted' + + proposal_id: Optional[str] = field(default=None) + comment: Optional[str] = field(default=None) + + def validate(self, submission: Submission) -> None: + """Ensure that the proposal isn't already approved or rejected.""" + if self.proposal_id not in submission.proposals: + raise InvalidEvent(self, f"No such proposal {self.proposal_id}") + elif submission.proposals[self.proposal_id].is_rejected(): + raise InvalidEvent(self, f"{self.proposal_id} is rejected") + elif submission.proposals[self.proposal_id].is_accepted(): + raise InvalidEvent(self, f"{self.proposal_id} is already accepted") + + def project(self, submission: Submission) -> Submission: + """Mark the proposal as accepted.""" + submission.proposals[self.proposal_id].status = Proposal.ACCEPTED + if self.comment: + submission.proposals[self.proposal_id].comments.append( + Comment(event_id=self.event_id, creator=self.creator, + created=self.created, proxy=self.proxy, + body=self.comment)) + return submission + + +@AcceptProposal.bind() +def apply_proposal(event: AcceptProposal, before: Submission, + after: Submission, creator: Agent) -> Iterable[Event]: + """Apply an accepted proposal.""" + proposal = after.proposals[event.proposal_id] + proposed_event_data = copy.deepcopy(proposal.proposed_event_data) + proposed_event_data.update({'creator': creator}) + event = proposal.proposed_event_type(**proposed_event_data) + yield event diff --git a/core/arxiv/submission/domain/event/request.py b/core/arxiv/submission/domain/event/request.py new file mode 100644 index 0000000..f1c4f02 --- /dev/null +++ b/core/arxiv/submission/domain/event/request.py @@ -0,0 +1,206 @@ +"""Commands/events related to user requests.""" + +from typing import Optional, List +import hashlib +from dataclasses import field +from .util import dataclass + +from arxiv import taxonomy + +from . import validators +from .base import Event +from ..submission import Submission, Classification, WithdrawalRequest, \ + CrossListClassificationRequest, UserRequest +from ...exceptions import InvalidEvent + + +@dataclass() +class ApproveRequest(Event): + """Approve a user request.""" + + NAME = "approve user request" + NAMED = "user request approved" + + request_id: Optional[str] = field(default=None) + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + if self.request_id not in submission.user_requests: + raise InvalidEvent(self, "No such request") + + def project(self, submission: Submission) -> Submission: + submission.user_requests[self.request_id].status = UserRequest.APPROVED + return submission + + +@dataclass() +class RejectRequest(Event): + NAME = "reject user request" + NAMED = "user request rejected" + + request_id: Optional[str] = field(default=None) + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + if self.request_id not in submission.user_requests: + raise InvalidEvent(self, "No such request") + + def project(self, submission: Submission) -> Submission: + submission.user_requests[self.request_id].status = UserRequest.REJECTED + return submission + + +@dataclass() +class CancelRequest(Event): + NAME = "cancel user request" + NAMED = "user request cancelled" + + request_id: Optional[str] = field(default=None) + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + if self.request_id not in submission.user_requests: + raise InvalidEvent(self, "No such request") + + def project(self, submission: Submission) -> Submission: + submission.user_requests[self.request_id].status = \ + UserRequest.CANCELLED + return submission + + +@dataclass() +class ApplyRequest(Event): + NAME = "apply user request" + NAMED = "user request applied" + + request_id: Optional[str] = field(default=None) + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + if self.request_id not in submission.user_requests: + raise InvalidEvent(self, "No such request") + + def project(self, submission: Submission) -> Submission: + user_request = submission.user_requests[self.request_id] + if hasattr(user_request, 'apply'): + submission = user_request.apply(submission) + user_request.status = UserRequest.APPLIED + submission.user_requests[self.request_id] = user_request + return submission + + +@dataclass() +class RequestCrossList(Event): + """Request that a secondary classification be added after announcement.""" + + NAME = "request cross-list classification" + NAMED = "cross-list classification requested" + + categories: List[taxonomy.Category] = field(default_factory=list) + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + """Validate the cross-list request.""" + validators.no_active_requests(self, submission) + if not submission.is_announced: + raise InvalidEvent(self, "Submission must already be announced") + for category in self.categories: + validators.must_be_a_valid_category(self, category, submission) + validators.cannot_be_primary(self, category, submission) + validators.cannot_be_secondary(self, category, submission) + + def project(self, submission: Submission) -> Submission: + """Create a cross-list request.""" + classifications = [ + Classification(category=category) for category in self.categories + ] + + req_id = CrossListClassificationRequest.generate_request_id(submission) + user_request = CrossListClassificationRequest( + request_id=req_id, + creator=self.creator, + created=self.created, + status=WithdrawalRequest.PENDING, + classifications=classifications + ) + submission.user_requests[req_id] = user_request + return submission + + +@dataclass() +class RequestWithdrawal(Event): + """Request that a paper be withdrawn.""" + + NAME = "request withdrawal" + NAMED = "withdrawal requested" + + reason: str = field(default_factory=str) + + MAX_LENGTH = 400 + + def __hash__(self) -> int: + """Use event ID as object hash.""" + return hash(self.event_id) + + def __eq__(self, other: Event) -> bool: + """Compare this event to another event.""" + return hash(self) == hash(other) + + def validate(self, submission: Submission) -> None: + """Make sure that a reason was provided.""" + validators.no_active_requests(self, submission) + if not self.reason: + raise InvalidEvent(self, "Provide a reason for the withdrawal") + if len(self.reason) > self.MAX_LENGTH: + raise InvalidEvent(self, "Reason must be 400 characters or less") + if not submission.is_announced: + raise InvalidEvent(self, "Submission must already be announced") + + def project(self, submission: Submission) -> Submission: + """Update the submission status and withdrawal reason.""" + req_id = WithdrawalRequest.generate_request_id(submission) + user_request = WithdrawalRequest( + request_id=req_id, + creator=self.creator, + created=self.created, + updated=self.created, + status=WithdrawalRequest.PENDING, + reason_for_withdrawal=self.reason + ) + submission.user_requests[req_id] = user_request + return submission diff --git a/core/arxiv/submission/domain/event/tests/__init__.py b/core/arxiv/submission/domain/event/tests/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/arxiv/submission/domain/event/tests/test_abstract_cleanup.py b/core/arxiv/submission/domain/event/tests/test_abstract_cleanup.py new file mode 100644 index 0000000..1c4b922 --- /dev/null +++ b/core/arxiv/submission/domain/event/tests/test_abstract_cleanup.py @@ -0,0 +1,52 @@ +"""Test abstract cleanup""" + +from unittest import TestCase +from .. import SetAbstract +from arxiv.base.filters import abstract_lf_to_br + +class TestSetAbstractCleanup(TestCase): + """Test abstract cleanup""" + + def test_paragraph_cleanup(self): + awlb = "Paragraph 1.\n \nThis should be paragraph 2" + self.assertIn(' in') + + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n\t\nThis should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace (tab)') + + awlb = "Paragraph 1.\n \nThis should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n \t \nThis should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n \nThis should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n This should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n\tThis should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') + + awlb = "Paragraph 1.\n This should be p 2." + e = SetAbstract(creator='xyz', abstract=awlb) + self.assertIn(' creating whitespace') diff --git a/core/arxiv/submission/domain/event/tests/test_event_construction.py b/core/arxiv/submission/domain/event/tests/test_event_construction.py new file mode 100644 index 0000000..51de87c --- /dev/null +++ b/core/arxiv/submission/domain/event/tests/test_event_construction.py @@ -0,0 +1,45 @@ +"""Test that all event classes are well-formed.""" + +from unittest import TestCase +import inspect +from ..base import Event + + +class TestNamed(TestCase): + """Verify that all event classes are named.""" + + def test_has_name(self): + """All event classes must have a ``NAME`` attribute.""" + for klass in Event.__subclasses__(): + self.assertTrue(hasattr(klass, 'NAME'), + f'{klass.__name__} is missing attribute NAME') + + def test_has_named(self): + """All event classes must have a ``NAMED`` attribute.""" + for klass in Event.__subclasses__(): + self.assertTrue(hasattr(klass, 'NAMED'), + f'{klass.__name__} is missing attribute NAMED') + + +class TestHasProjection(TestCase): + """Verify that all event classes have a projection method.""" + + def test_has_projection(self): + """Each event class must have an instance method ``project()``.""" + for klass in Event.__subclasses__(): + self.assertTrue(hasattr(klass, 'project'), + f'{klass.__name__} is missing project() method') + self.assertTrue(inspect.isfunction(klass.project), + f'{klass.__name__} is missing project() method') + + +class TestHasValidation(TestCase): + """Verify that all event classes have a projection method.""" + + def test_has_validate(self): + """Each event class must have an instance method ``validate()``.""" + for klass in Event.__subclasses__(): + self.assertTrue(hasattr(klass, 'validate'), + f'{klass.__name__} is missing validate() method') + self.assertTrue(inspect.isfunction(klass.validate), + f'{klass.__name__} is missing validate() method') diff --git a/core/arxiv/submission/domain/event/tests/test_hooks.py b/core/arxiv/submission/domain/event/tests/test_hooks.py new file mode 100644 index 0000000..24389f8 --- /dev/null +++ b/core/arxiv/submission/domain/event/tests/test_hooks.py @@ -0,0 +1,59 @@ +"""Test callback hook functionality on :class:`Event`.""" + +from unittest import TestCase, mock +from dataclasses import dataclass, field +from ..base import Event +from ...agent import System + + +class TestCommitEvent(TestCase): + """Tests for :func:`Event.bind` and :class:`Event.commit`.""" + + def test_commit_event(self): + """Test a simple commit hook.""" + @dataclass + class ChildEvent(Event): + def _should_apply_callbacks(self): + return True + + @dataclass + class OtherChildEvent(Event): + def _should_apply_callbacks(self): + return True + + callback = mock.MagicMock(return_value=[], __name__='test') + ChildEvent.bind(lambda *a: True)(callback) # Register callback. + + save = mock.MagicMock( + return_value=(mock.MagicMock(), mock.MagicMock()) + ) + event = ChildEvent(creator=System('system')) + OtherChildEvent(creator=System('system')) + event.commit(save) + self.assertEqual(callback.call_count, 1, + "Callback is only executed on the class to which it" + " is bound") + + def test_callback_inheritance(self): + """Callback is inherited by subclasses.""" + @dataclass + class ParentEvent(Event): + def _should_apply_callbacks(self): + return True + + @dataclass + class ChildEvent(ParentEvent): + def _should_apply_callbacks(self): + return True + + callback = mock.MagicMock(return_value=[], __name__='test') + ParentEvent.bind(lambda *a: True)(callback) # Register callback. + + save = mock.MagicMock( + return_value=(mock.MagicMock(), mock.MagicMock()) + ) + event = ChildEvent(creator=System('system')) + event.commit(save) + self.assertEqual(callback.call_count, 1, + "Callback bound to parent class is called when child" + " is committed") diff --git a/core/arxiv/submission/domain/event/util.py b/core/arxiv/submission/domain/event/util.py new file mode 100644 index 0000000..8583381 --- /dev/null +++ b/core/arxiv/submission/domain/event/util.py @@ -0,0 +1,27 @@ +"""Helpers for event classes.""" + +from typing import Any + +from dataclasses import dataclass as base_dataclass + + +def event_hash(instance: Any) -> int: + """Use event ID as object hash.""" + return hash(instance.event_id) # typing: ignore + + +def event_eq(instance: Any, other: Any) -> bool: + """Compare this event to another event.""" + return hash(instance) == hash(other) + + +def dataclass(**kwargs) -> type: + def inner(cls): + if kwargs: + new_cls = base_dataclass(**kwargs)(cls) + else: + new_cls = base_dataclass(cls) + setattr(new_cls, '__hash__', event_hash) + setattr(new_cls, '__eq__', event_eq) + return new_cls + return inner diff --git a/core/arxiv/submission/domain/event/validators.py b/core/arxiv/submission/domain/event/validators.py new file mode 100644 index 0000000..8d65cd6 --- /dev/null +++ b/core/arxiv/submission/domain/event/validators.py @@ -0,0 +1,68 @@ +"""Reusable validators for events.""" + +import re + +from arxiv import taxonomy, identifier + +from .base import Event +from ..submission import Submission +from ...exceptions import InvalidEvent + + +def submission_is_not_finalized(event: Event, submission: Submission) -> None: + """ + Verify that the submission is not finalized. + + Parameters + ---------- + event : :class:`.Event` + submission : :class:`.domain.submission.Submission` + + Raises + ------ + :class:`.InvalidEvent` + Raised if the submission is finalized. + + """ + if submission.is_finalized: + raise InvalidEvent(event, "Cannot apply to a finalized submission") + + +def no_trailing_period(event: Event, submission: Submission, + value: str) -> None: + """ + Verify that there are no trailing periods in ``value`` except ellipses. + """ + if re.search(r"(? None: + """Valid arXiv categories are defined in :mod:`arxiv.taxonomy`.""" + if not category or category not in taxonomy.CATEGORIES_ACTIVE: + raise InvalidEvent(event, "Not a valid category") + + +def cannot_be_primary(event: Event, category: str, submission: Submission) \ + -> None: + """The category can't already be set as a primary classification.""" + if submission.primary_classification is None: + return + if category == submission.primary_classification.category: + raise InvalidEvent(event, "The same category cannot be used as both" + " the primary and a secondary category.") + + +def cannot_be_secondary(event: Event, category: str, submission: Submission) \ + -> None: + """The same category cannot be added as a secondary twice.""" + if category in submission.secondary_categories: + raise InvalidEvent(event, f"Secondary {category} already set on this" + f" submission.") + + +def no_active_requests(event: Event, submission: Submission) -> None: + if submission.has_active_requests: + raise InvalidEvent(event, "Must not have active requests.") diff --git a/core/arxiv/submission/domain/event/versioning/__init__.py b/core/arxiv/submission/domain/event/versioning/__init__.py new file mode 100644 index 0000000..7b163ac --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/__init__.py @@ -0,0 +1,131 @@ +""" +Provides on-the-fly versioned migrations for event data. + +The purpose of this module is to facilitate backwards-compatible changes to +the structure of :class:`.domain.event.Event` classes. This problem is similar +to database migrations, except that the "meat" of the event data are dicts +stored as JSON and thus ALTER commands won't get us all that far. + +Writing version mappings +======================== +Any new version of this software that includes changes to existing +event/command classes that would break events from earlier versions **MUST** +include a version mapping module. The module should include a mapping class +(a subclass of :class:`.BaseVersionMapping`) for each event type for which +there are relevant changes. + +See :mod:`.versioning.version_0_0_0_example` for an example. + +Each such class must include an internal ``Meta`` class with its software +version and the name of the event type to which it applies. For example: + +.. code-block:: python + + from ._base import BaseVersionMapping + + class SetAbstractMigration(BaseVersionMapping): + class Meta: + event_version = "0.2.12" # Must be a semver. + event_type = "SetAbstract" + + +In addition, it's a good idea to include some test data that can be used to +verify the behavior of the migration. You can do this by adding a ``tests`` +attribute to ``Meta`` that includes tuples of the form +``(original: EventData, expected: EventData)``. For example: + + +.. code-block:: python + + from ._base import BaseVersionMapping + + class SetAbstractMigration(BaseVersionMapping): + class Meta: + event_version = "0.2.12" # Must be a semver. + event_type = "SetAbstract" + tests = [({"event_version": "0.2.11", "abstract": "very abstract"}, + {"event_version": "0.2.12", "abstract": "more abstract"})] + + +Transformation logic can be implemented for individual fields, or for the event +datum as a whole. + +Transforming individual fields +------------------------------ +Transformers for individual fields may be implemented by +defining instance methods with the name ``transform_{field}`` and the signature +``(self, original: EventData, key: str, value: Any) -> Tuple[str, Any]``. +The return value is the field name and transformed value. Note that the field +name may be altered here, and the original field name will be omitted from the +final transformed representation of the event datum. + +Transforming the datum as a whole +--------------------------------- +A transformer for the datum as a whole may be implemented by defining an +instance method named ``transform`` with the signature +``(self, original: EventData, transformed: EventData) -> EventData``. This is +called **after** the transformers for individual fields; the second positional +argument is the state of the datum at that point, and the first positional +argument is the state of the datum before transformations were applied. +""" + +import copy +from ._base import EventData, BaseVersionMapping, Version + +from arxiv.base.globals import get_application_config + + +def map_to_version(original: EventData, target: str) -> EventData: + """ + Map raw event data to a later version. + + Loads all version mappings for the original event type subsequent to the + version of the software at which the data was created, up to and + includiong the ``target`` version. + + Parameters + ---------- + original : dict + Original event data. + target : str + The target software version. Must be a valid semantic version, i.e. + with major, minor, and patch components. + + Returns + ------- + dict + Data from ``original`` transformed into a representation suitable for + use in the target software version. + + """ + original_version = Version.from_event_data(original) + transformed = copy.deepcopy(original) + for mapping in BaseVersionMapping.__subclasses__(): + if original['event_type'] == mapping.Meta.event_type \ + and Version(mapping.Meta.event_version) <= Version(target) \ + and Version(mapping.Meta.event_version) > original_version: + mapper = mapping() + transformed = mapper(transformed) + return transformed + + +def map_to_current_version(original: EventData) -> EventData: + """ + Map raw event data to the current software version. + + Relies on the ``CORE_VERSION`` parameter in the application configuration. + + Parameters + ---------- + original : dict + Original event data. + + Returns + ------- + dict + Data from ``original`` transformed into a representation suitable for + use in the current software version. + + """ + current_version = get_application_config().get('CORE_VERSION', '0.0.0') + return map_to_version(original, current_version) diff --git a/core/arxiv/submission/domain/event/versioning/_base.py b/core/arxiv/submission/domain/event/versioning/_base.py new file mode 100644 index 0000000..c80f53d --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/_base.py @@ -0,0 +1,97 @@ +"""Provides :class:`.BaseVersionMapping`.""" + +from typing import Optional, Callable, Any, Tuple +from datetime import datetime +from mypy_extensions import TypedDict +import semver + + +class EventData(TypedDict): + """Raw event data from the event store.""" + + _version: str + created: datetime + event_type: str + + +class Version(str): + """A semantic version.""" + + @classmethod + def from_event_data(cls, data: EventData) -> 'Version': + """Create a :class:`.Version` from :class:`.EventData`.""" + return cls(data['event_version']) + + def __eq__(self, other: 'Version') -> bool: + """Equality comparison using semantic versioning.""" + return semver.compare(self, other) == 0 + + def __lt__(self, other: 'Version') -> bool: + """Less-than comparison using semantic versioning.""" + return semver.compare(self, other) < 0 + + def __le__(self, other: 'Version') -> bool: + """Less-than-equals comparison using semantic versioning.""" + return semver.compare(self, other) <= 0 + + def __gt__(self, other: 'Version') -> bool: + """Greater-than comparison using semantic versioning.""" + return semver.compare(self, other) > 0 + + def __ge__(self, other: 'Version') -> bool: + """Greater-than-equals comparison using semantic versioning.""" + return semver.compare(self, other) >= 0 + + +FieldTransformer = Callable[[EventData, str, Any], Tuple[str, Any]] + + +class BaseVersionMapping: + """Base class for version mappings.""" + + _protected = ['event_type', 'event_version', 'created'] + + def __init__(self) -> None: + """Verify that the instance has required metadata.""" + if not hasattr(self, 'Meta'): + raise NotImplementedError('Missing `Meta` on child class') + if not hasattr(self.Meta, 'event_version'): + raise NotImplementedError('Missing version on child class') + if not hasattr(self.Meta, 'event_type'): + raise NotImplementedError('Missing event_type on child class') + + def __call__(self, original: EventData) -> EventData: + """Transform some :class:`.EventData`.""" + return self._transform(original) + + @classmethod + def test(cls) -> None: + """Perform tests on the mapping subclass.""" + try: + cls() + except NotImplementedError as e: + raise AssertionError('Not correctly implemented') from e + for original, expected in getattr(cls.Meta, 'tests', []): + assert cls()(original) == expected + try: + semver.parse_version_info(cls.Meta.event_version) + except ValueError as e: + raise AssertionError('Not a valid semantic version') from e + + def _get_field_transformer(self, field: str) -> Optional[FieldTransformer]: + """Get a transformation for a field, if it is defined.""" + return getattr(self, f'transform_{field}', None) + + def _transform(self, original: EventData) -> EventData: + """Perform transformation of event data.""" + transformed = {} + for key, value in original.items(): + if key not in self._protected: + field_transformer = self._get_field_transformer(key) + if field_transformer is not None: + key, value = field_transformer(original, key, value) + transformed[key] = value + if hasattr(self, 'transform'): + transformed = self.transform(original, transformed) + transformed['event_version'] = self.Meta.event_version + return transformed diff --git a/core/arxiv/submission/domain/event/versioning/tests/__init__.py b/core/arxiv/submission/domain/event/versioning/tests/__init__.py new file mode 100644 index 0000000..c041a0d --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for versioning mechanisms.""" diff --git a/core/arxiv/submission/domain/event/versioning/tests/test_example.py b/core/arxiv/submission/domain/event/versioning/tests/test_example.py new file mode 100644 index 0000000..83be437 --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/tests/test_example.py @@ -0,0 +1,15 @@ +"""Test the example version mapping module.""" + +from unittest import TestCase + +from .. import map_to_version +from .._base import BaseVersionMapping +from .. import version_0_0_0_example + + +class TestSetTitleExample(TestCase): + """Test the :class:`.version_0_0_0_example.SetTitleExample` mapping.""" + + def test_set_title(self): + """Execute the built-in version mapping tests.""" + version_0_0_0_example.SetTitleExample.test() diff --git a/core/arxiv/submission/domain/event/versioning/tests/test_versioning.py b/core/arxiv/submission/domain/event/versioning/tests/test_versioning.py new file mode 100644 index 0000000..e82c097 --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/tests/test_versioning.py @@ -0,0 +1,136 @@ +"""Test versioning of event data.""" + +from unittest import TestCase + +from .. import map_to_version +from .._base import BaseVersionMapping + + +class TitleIsNowCoolTitle(BaseVersionMapping): + """Changes the ``title`` field to ``cool_title``.""" + + class Meta: + """Metadata for this mapping.""" + + event_version = '0.3.5' + event_type = "SetTitle" + tests = [({'event_version': '0.1.1', 'title': 'olde'}, + {'event_version': '0.3.5', 'cool_title': 'olde'})] + + def transform_title(self, original, key, value): + """Rename the `title` field to `cool_title`.""" + return "cool_title", value + + +class TestVersionMapping(TestCase): + """Tests for :func:`.map_to_version`.""" + + def test_map_to_version(self): + """We have data from a previous version and an intermediate mapping.""" + data = { + 'event_version': '0.1.2', + 'event_type': 'SetTitle', + 'title': 'Some olde title' + } + + expected = { + 'event_version': '0.3.5', + 'event_type': 'SetTitle', + 'cool_title': 'Some olde title' + } + self.assertDictEqual(map_to_version(data, '0.4.1'), expected, + "The mapping is applied") + + def test_map_to_version_no_intermediate(self): + """We have data from a previous version and no intermediate mapping.""" + data = { + 'event_version': '0.5.5', + 'event_type': 'SetTitle', + 'cool_title': 'Some olde title' + } + self.assertDictEqual(map_to_version(data, '0.6.7'), data, + "The mapping is not applied") + + def test_data_is_up_to_date(self): + """We have data that is 100% current.""" + data = { + 'event_version': '0.5.5', + 'event_type': 'SetTitle', + 'cool_title': 'Some olde title' + } + self.assertDictEqual(map_to_version(data, '0.5.5'), data, + "The mapping is not applied") + + +class TestVersionMappingTests(TestCase): + """Tests defined in metadata can be run, with the expected result.""" + + def test_test(self): + """Run tests in mapping metadata.""" + class BrokenFitleIsNowCoolTitle(BaseVersionMapping): + """A broken version mapping.""" + + class Meta: + """Metadata for this mapping.""" + + event_version = '0.3.5' + event_type = "SetFitle" + tests = [({'event_version': '0.1.1', 'title': 'olde'}, + {'event_version': '0.3.5', 'cool_title': 'olde'})] + + def transform_title(self, original, key, value): + """Rename the `title` field to `cool_title`.""" + return "fool_title", value + + TitleIsNowCoolTitle.test() + with self.assertRaises(AssertionError): + BrokenFitleIsNowCoolTitle.test() + + def test_version_is_present(self): + """Tests check that version is specified.""" + class MappingWithoutVersion(BaseVersionMapping): + """Mapping that is missing a version.""" + + class Meta: + """Metadata for this mapping.""" + + event_type = "FetBitle" + + with self.assertRaises(AssertionError): + MappingWithoutVersion.test() + + def test_event_type_is_present(self): + """Tests check that event_type is specified.""" + class MappingWithoutEventType(BaseVersionMapping): + """Mapping that is missing an event type.""" + + class Meta: + """Metadata for this mapping.""" + + event_version = "5.3.2" + + with self.assertRaises(AssertionError): + MappingWithoutEventType.test() + + def test_version_is_valid(self): + """Tests check that version is a valid semver.""" + class MappingWithInvalidVersion(BaseVersionMapping): + """Mapping that has an invalid semantic version.""" + + class Meta: + """Metadata for this mapping.""" + + event_version = "52" + event_type = "FetBitle" + + with self.assertRaises(AssertionError): + MappingWithInvalidVersion.test() + + +class TestVersioningModule(TestCase): + def test_loads_mappings(self): + """Loading a version mapping module installs those mappings.""" + from .. import version_0_0_0_example + self.assertIn(version_0_0_0_example.SetTitleExample, + BaseVersionMapping.__subclasses__(), + 'Mappings in an imported module are available for use') diff --git a/core/arxiv/submission/domain/event/versioning/version_0_0_0_example.py b/core/arxiv/submission/domain/event/versioning/version_0_0_0_example.py new file mode 100644 index 0000000..e4481e3 --- /dev/null +++ b/core/arxiv/submission/domain/event/versioning/version_0_0_0_example.py @@ -0,0 +1,41 @@ +""" +An example version mapping module. + +This module gathers together all event mappings for version 0.0.0. + +The mappings in this module will never be used, since there are no +data prior to version 0.0.0. +""" +from typing import Tuple +from ._base import BaseVersionMapping, EventData + +VERSION = '0.0.0' + + +class SetTitleExample(BaseVersionMapping): + """Perform no changes whatsoever to the `title` field.""" + + class Meta: + """Metadata about this mapping.""" + + event_version = VERSION + """All of the mappings in this module are for the same version.""" + + event_type = 'SetTitle' + """This mapping applies to :class:`.domain.event.SetTitle`.""" + + tests = [ + ({'event_version': '0.0.0', 'title': 'The title'}, + {'event_version': '0.0.0', 'title': 'The best title!!'}) + ] + """Expected changes to the ``title`` field.""" + + def transform_title(self, orig: EventData, key: str, val: str) \ + -> Tuple[str, str]: + """Make the title the best.""" + parts = val.split() + return key, " ".join([parts[0], "best"] + parts[1:]) + + def transform(self, orig: EventData, xf: EventData) -> EventData: + """Add some emphasis.""" + return {k: f"{v}!!" for k, v in xf.items() if type(v) is str} diff --git a/core/arxiv/submission/domain/flag.py b/core/arxiv/submission/domain/flag.py new file mode 100644 index 0000000..14cddd8 --- /dev/null +++ b/core/arxiv/submission/domain/flag.py @@ -0,0 +1,93 @@ +"""Data structures related to QA.""" + +from datetime import datetime +from typing import Optional, Union +from enum import Enum + +from mypy_extensions import TypedDict +from dataclasses import field, dataclass, asdict + +from .agent import Agent, agent_factory + + +PossibleDuplicate = TypedDict('PossibleDuplicate', + {'id': int, 'title': str, 'owner': Agent}) + + +@dataclass +class Flag: + """Base class for flags.""" + + event_id: str + creator: Agent + created: datetime + flag_type: str + flag_data: Optional[Union[int, str, float, dict, list]] + comment: str + proxy: Optional[Agent] = field(default=None) + flag_datatype: str = field(default_factory=str) + + def __post_init__(self): + """Set derivative fields.""" + self.flag_datatype = self.__class__.__name__ + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + + +@dataclass +class ContentFlag(Flag): + """A flag related to the content of the submission.""" + + class Type(Enum): + """Supported content flags.""" + + LOW_STOP = 'low stopwords' + """Number of stopwords is abnormally low.""" + LOW_STOP_PERCENT = 'low stopword percentage' + """Frequency of stopwords is abnormally low.""" + LANGUAGE = 'language' + """Possibly not English language.""" + CHARACTER_SET = 'character set' + """Possibly excessive use of non-ASCII characters.""" + LINE_NUMBERS = 'line numbers' + """Content has line numbers.""" + + +@dataclass +class MetadataFlag(Flag): + """A flag related to the submission metadata.""" + + field: Optional[str] = field(default=None) + + class Type(Enum): + """Supported metadata flags.""" + + POSSIBLE_DUPLICATE_TITLE = 'possible duplicate title' + LANGUAGE = 'language' + CHARACTER_SET = 'character_set' + + +@dataclass +class UserFlag(Flag): + """A flag related to the submitter.""" + + class Type(Enum): + """Supported user flags.""" + + RATE = 'rate' + + +flag_datatypes = { + 'ContentFlag': ContentFlag, + 'MetadataFlag': MetadataFlag, + 'UserFlag': UserFlag +} + + +def flag_factory(**data) -> Flag: + cls = flag_datatypes[data.pop('flag_datatype')] + if not isinstance(data['flag_type'], cls.Type): + data['flag_type'] = cls.Type(data['flag_type']) + return cls(**data) diff --git a/core/arxiv/submission/domain/meta.py b/core/arxiv/submission/domain/meta.py new file mode 100644 index 0000000..030ad3a --- /dev/null +++ b/core/arxiv/submission/domain/meta.py @@ -0,0 +1,20 @@ +"""Metadata objects in support of submissions.""" + +from typing import Optional, List +from arxiv.taxonomy import Category +from dataclasses import dataclass, asdict, field + + +@dataclass +class Classification: + """A classification for a :class:`.domain.submission.Submission`.""" + + category: Category + + +@dataclass +class License: + """An license for distribution of the submission.""" + + uri: str + name: Optional[str] = None diff --git a/core/arxiv/submission/domain/process.py b/core/arxiv/submission/domain/process.py new file mode 100644 index 0000000..0840f64 --- /dev/null +++ b/core/arxiv/submission/domain/process.py @@ -0,0 +1,48 @@ +"""Status information for external or long-running processes.""" + +from typing import Optional +from enum import Enum +from datetime import datetime + +from dataclasses import dataclass, field, asdict + +from .agent import Agent, agent_factory +from .util import get_tzaware_utc_now + + +@dataclass +class ProcessStatus: + """Represents the status of a long-running remote process.""" + + class Status(Enum): + """Supported statuses.""" + + PENDING = 'pending' + """The process is waiting to start.""" + IN_PROGRESS = 'in_progress' + """Process has started, and is running remotely.""" + FAILED_TO_START = 'failed_to_start' + """Could not start the process.""" + FAILED = 'failed' + """The process failed while running.""" + FAILED_TO_END = 'failed_to_end' + """The process ran, but failed to end gracefully.""" + SUCCEEDED = 'succeeded' + """The process ended successfully.""" + TERMINATED = 'terminated' + """The process was terminated, e.g. cancelled by operator.""" + + creator: Agent + created: datetime + """Time when the process status was created (not the process itself).""" + process: str + step: Optional[str] = field(default=None) + status: Status = field(default=Status.PENDING) + reason: Optional[str] = field(default=None) + """Optional context or explanatory details related to the status.""" + + def __post_init__(self): + """Check our enums and agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + self.status = self.Status(self.status) diff --git a/core/arxiv/submission/domain/proposal.py b/core/arxiv/submission/domain/proposal.py new file mode 100644 index 0000000..d2f35d1 --- /dev/null +++ b/core/arxiv/submission/domain/proposal.py @@ -0,0 +1,64 @@ +""" +Proposals provide a mechanism for suggesting changes to submissions. + +The primary use-case in the classic submission & moderation system is for +suggesting changes to the primary or cross-list classification. Such proposals +are generated both automatically based on the results of the classifier and +manually by moderators. +""" + +from typing import Optional, Union, List +from datetime import datetime +import hashlib + +from dataclasses import dataclass, asdict, field +from enum import Enum + +from arxiv.taxonomy import Category + +from .annotation import Comment +from .util import get_tzaware_utc_now +from .agent import Agent, agent_factory + + +@dataclass +class Proposal: + """Represents a proposal to apply an event to a submission.""" + + class Status(Enum): + PENDING = 'pending' + REJECTED = 'rejected' + ACCEPTED = 'accepted' + + event_id: str + creator: Agent + created: datetime = field(default_factory=get_tzaware_utc_now) + # scope: str # TODO: document this. + proxy: Optional[Agent] = field(default=None) + + proposed_event_type: Optional[type] = field(default=None) + proposed_event_data: dict = field(default_factory=dict) + comments: List[Comment] = field(default_factory=list) + status: Status = field(default=Status.PENDING) + + @property + def proposal_type(self) -> str: + """Name (str) of the type of annotation.""" + return self.proposed_event_type.__name__ + + def __post_init__(self) -> None: + """Check our enums and agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + self.status = self.Status(self.status) + + def is_rejected(self) -> bool: + return self.status == self.Status.REJECTED + + def is_accepted(self) -> bool: + return self.status == self.Status.ACCEPTED + + def is_pending(self) -> bool: + return self.status == self.Status.PENDING diff --git a/core/arxiv/submission/domain/submission.py b/core/arxiv/submission/domain/submission.py new file mode 100644 index 0000000..a9a0199 --- /dev/null +++ b/core/arxiv/submission/domain/submission.py @@ -0,0 +1,515 @@ +"""Data structures for submissions.""" + +from typing import Optional, Dict, TypeVar, List, Iterable, Set, Union +from datetime import datetime +from dateutil.parser import parse as parse_date +from enum import Enum +import hashlib + +from dataclasses import dataclass, field, asdict + +from .agent import Agent, agent_factory +from .meta import License, Classification +from .annotation import Comment, Feature, Annotation, annotation_factory +from .proposal import Proposal +from .process import ProcessStatus +from .flag import Flag, flag_factory +from .util import get_tzaware_utc_now, dict_coerce, list_coerce +from .compilation import Compilation + + +@dataclass +class Author: + """Represents an author of a submission.""" + + order: int = field(default=0) + forename: str = field(default_factory=str) + surname: str = field(default_factory=str) + initials: str = field(default_factory=str) + affiliation: str = field(default_factory=str) + email: str = field(default_factory=str) + identifier: Optional[str] = field(default=None) + display: Optional[str] = field(default=None) + """ + Submitter may include a preferred display name for each author. + + If not provided, will be automatically generated from the other fields. + """ + + def __post_init__(self) -> None: + """Auto-generate an identifier, if not provided.""" + if not self.identifier: + self.identifier = self._generate_identifier() + if not self.display: + self.display = self.canonical + + def _generate_identifier(self): + h = hashlib.new('sha1') + h.update(bytes(':'.join([self.forename, self.surname, self.initials, + self.affiliation, self.email]), + encoding='utf-8')) + return h.hexdigest() + + @property + def canonical(self): + """Canonical representation of the author name.""" + name = "%s %s %s" % (self.forename, self.initials, self.surname) + name = name.replace(' ', ' ') + if self.affiliation: + return "%s (%s)" % (name, self.affiliation) + return name + + +@dataclass +class SubmissionContent: + """Metadata about the submission source package.""" + + class Format(Enum): + """Supported source formats.""" + + UNKNOWN = None + """We could not determine the source format.""" + INVALID = "invalid" + """We are able to infer the source format, and it is not supported.""" + TEX = "tex" + """A flavor of TeX.""" + PDFTEX = "pdftex" + """A PDF derived from TeX.""" + POSTSCRIPT = "ps" + """A postscript source.""" + HTML = "html" + """An HTML source.""" + PDF = "pdf" + """A PDF-only source.""" + + identifier: str + checksum: str + uncompressed_size: int + compressed_size: int + source_format: Format = Format.UNKNOWN + + def __post_init__(self): + """Make sure that :attr:`.source_format` is a :class:`.Format`.""" + if self.source_format and type(self.source_format) is str: + self.source_format = self.Format(self.source_format) + + +@dataclass +class SubmissionMetadata: + """Metadata about a :class:`.domain.submission.Submission` instance.""" + + title: Optional[str] = None + abstract: Optional[str] = None + + authors: list = field(default_factory=list) + authors_display: str = field(default_factory=str) + """The canonical arXiv author string.""" + + doi: Optional[str] = None + msc_class: Optional[str] = None + acm_class: Optional[str] = None + report_num: Optional[str] = None + journal_ref: Optional[str] = None + + comments: str = field(default_factory=str) + + +@dataclass +class Delegation: + """Delegation of editing privileges to a non-owning :class:`.Agent`.""" + + delegate: Agent + creator: Agent + created: datetime = field(default_factory=get_tzaware_utc_now) + delegation_id: str = field(default_factory=str) + + def __post_init__(self): + """Set derivative fields.""" + self.delegation_id = self.get_delegation_id() + + def get_delegation_id(self): + """Generate unique identifier for the delegation instance.""" + h = hashlib.new('sha1') + h.update(b'%s:%s:%s' % (self.delegate.agent_identifier, + self.creator.agent_identifier, + self.created.isodate())) + return h.hexdigest() + + +@dataclass +class Hold: + """Represents a block on announcement, usually for QA/QC purposes.""" + + class Type(Enum): + """Supported holds in the submission system.""" + + PATCH = 'patch' + """A hold generated from the classic submission system.""" + + SOURCE_OVERSIZE = "source_oversize" + """The submission source is oversize.""" + + PDF_OVERSIZE = "pdf_oversize" + """The submission PDF is oversize.""" + + event_id: str + """The event that created the hold.""" + + creator: Agent + created: datetime = field(default_factory=get_tzaware_utc_now) + hold_type: Type = field(default=Type.PATCH) + hold_reason: Optional[str] = field(default_factory=str) + + def __post_init__(self): + """Check enums and agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + self.hold_type = self.Type(self.hold_type) + # if not isinstance(created, datetime): + # created = parse_date(created) + + +@dataclass +class Waiver: + """Represents an exception or override.""" + + event_id: str + """The identifier of the event that produced this waiver.""" + waiver_type: Hold.Type + waiver_reason: str + created: datetime + creator: Agent + + def __post_init__(self): + """Check enums and agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + self.waiver_type = Hold.Type(self.waiver_type) + + +# TODO: add identification mechanism; consider using mechanism similar to +# comments, below. +@dataclass +class UserRequest: + """Represents a user request related to a submission.""" + + WORKING = 'working' + """Request is not yet submitted.""" + + PENDING = 'pending' + """Request is pending approval.""" + + REJECTED = 'rejected' + """Request has been rejected.""" + + APPROVED = 'approved' + """Request has been approved.""" + + APPLIED = 'applied' + """Submission has been updated on the basis of the approved request.""" + + CANCELLED = 'cancelled' + + request_id: str + creator: Agent + created: datetime = field(default_factory=get_tzaware_utc_now) + updated: datetime = field(default_factory=get_tzaware_utc_now) + status: str = field(default=PENDING) + request_type: str = field(default_factory=str) + + def __post_init__(self): + """Check agents.""" + if self.creator and type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + self.request_type = self.get_request_type() + + def get_request_type(self) -> str: + """Name (str) of the type of user request.""" + return type(self).__name__ + + def is_pending(self) -> bool: + """Check whether the request is pending.""" + return self.status == UserRequest.PENDING + + def is_approved(self) -> bool: + """Check whether the request has been approved.""" + return self.status == UserRequest.APPROVED + + def is_applied(self) -> bool: + """Check whether the request has been applied.""" + return self.status == UserRequest.APPLIED + + def is_rejected(self) -> bool: + """Check whether the request has been rejected.""" + return self.status == UserRequest.REJECTED + + def is_active(self) -> bool: + """Check whether the request is active.""" + return self.is_pending() or self.is_approved() + + @classmethod + def generate_request_id(cls, submission: 'Submission', N: int = -1) -> str: + """Generate a unique identifier for this request.""" + h = hashlib.new('sha1') + if N < 0: + N = len([rq for rq in submission.iter_requests if type(rq) is cls]) + h.update(f'{submission.submission_id}:{cls.NAME}:{N}'.encode('utf-8')) + return h.hexdigest() + + +@dataclass +class WithdrawalRequest(UserRequest): + """Represents a request to withdraw a submission.""" + + NAME = "Withdrawal" + + reason_for_withdrawal: Optional[str] = field(default=None) + """If an e-print is withdrawn, the submitter is asked to explain why.""" + + def apply(self, submission: 'Submission') -> 'Submission': + """Apply the withdrawal.""" + submission.reason_for_withdrawal = self.reason_for_withdrawal + submission.status = Submission.WITHDRAWN + return submission + + +@dataclass +class CrossListClassificationRequest(UserRequest): + """Represents a request to add secondary classifications.""" + + NAME = "Cross-list" + + classifications: List[Classification] = field(default_factory=list) + + def apply(self, submission: 'Submission') -> 'Submission': + """Apply the cross-list request.""" + submission.secondary_classification.extend(self.classifications) + return submission + + @property + def categories(self) -> List[str]: + """Get the requested cross-list categories.""" + return [c.category for c in self.classifications] + + +@dataclass +class Submission: + """ + Represents an arXiv submission object. + + Some notable differences between this view of submissions and the classic + model: + + - There is no "hold" status. Status reflects where the submission is + in the pipeline. Holds are annotations that can be applied to the + submission, and may impact its ability to proceed (e.g. from submitted + to scheduled). Submissions that are in working status can have holds on + them! + - We use `arxiv_id` instead of `paper_id` to refer to the canonical arXiv + identifier for the e-print (once it is announced). + - Instead of having a separate "submission" record for every change to an + e-print (e.g. replacement, jref, etc), we represent the entire history + as a single submission. Announced versions can be found in + :attr:`.versions`. Withdrawal and cross-list requests can be found in + :attr:`.user_requests`. JREFs are treated like they "just happen", + reflecting the forthcoming move away from storing journal ref information + in the core metadata record. + + """ + + WORKING = 'working' + SUBMITTED = 'submitted' + SCHEDULED = 'scheduled' + ANNOUNCED = 'announced' + ERROR = 'error' # TODO: eliminate this status. + DELETED = 'deleted' + WITHDRAWN = 'withdrawn' + + creator: Agent + owner: Agent + proxy: Optional[Agent] = field(default=None) + client: Optional[Agent] = field(default=None) + created: Optional[datetime] = field(default=None) + updated: Optional[datetime] = field(default=None) + submitted: Optional[datetime] = field(default=None) + submission_id: Optional[int] = field(default=None) + + source_content: Optional[SubmissionContent] = field(default=None) + metadata: SubmissionMetadata = field(default_factory=SubmissionMetadata) + primary_classification: Optional[Classification] = field(default=None) + secondary_classification: List[Classification] = \ + field(default_factory=list) + submitter_contact_verified: bool = field(default=False) + submitter_is_author: Optional[bool] = field(default=None) + submitter_accepts_policy: Optional[bool] = field(default=None) + submitter_compiled_preview: bool = field(default=False) + submitter_confirmed_preview: bool = field(default=False) + license: Optional[License] = field(default=None) + status: str = field(default=WORKING) + """Disposition within the submission pipeline.""" + + arxiv_id: Optional[str] = field(default=None) + """The announced arXiv paper ID.""" + + version: int = field(default=1) + + reason_for_withdrawal: Optional[str] = field(default=None) + """If an e-print is withdrawn, the submitter is asked to explain why.""" + + versions: List['Submission'] = field(default_factory=list) + """Announced versions of this :class:`.domain.submission.Submission`.""" + + # These fields are related to moderation/quality control. + user_requests: Dict[str, UserRequest] = field(default_factory=dict) + """Requests from the owner for changes that require approval.""" + + proposals: Dict[str, Proposal] = field(default_factory=dict) + """Proposed changes to the submission, e.g. reclassification.""" + + processes: List[ProcessStatus] = field(default_factory=list) + """Information about automated processes.""" + + annotations: Dict[str, Annotation] = field(default_factory=dict) + """Quality control annotations.""" + + flags: Dict[str, Flag] = field(default_factory=dict) + """Quality control flags.""" + + comments: Dict[str, Comment] = field(default_factory=dict) + """Moderation/administrative comments.""" + + holds: Dict[str, Hold] = field(default_factory=dict) + """Quality control holds.""" + + waivers: Dict[str, Waiver] = field(default_factory=dict) + """Quality control waivers.""" + + @property + def features(self) -> Dict[str, Feature]: + return {k: v for k, v in self.annotations.items() + if isinstance(v, Feature)} + + @property + def is_active(self) -> bool: + """Actively moving through the submission workflow.""" + return self.status not in [self.DELETED, self.ANNOUNCED] + + @property + def is_announced(self) -> bool: + """The submission has been announced.""" + return self.status == self.ANNOUNCED + + @property + def is_finalized(self) -> bool: + """Submitter has indicated submission is ready for publication.""" + return self.status not in [self.WORKING, self.DELETED] + + @property + def is_deleted(self) -> bool: + """Submission is removed.""" + return self.status == self.DELETED + + @property + def primary_category(self) -> str: + return self.primary_classification.category + + @property + def secondary_categories(self) -> List[str]: + """Category names from secondary classifications.""" + return [c.category for c in self.secondary_classification] + + @property + def is_on_hold(self) -> bool: + # We need to explicitly check ``status`` here because classic doesn't + # have a representation for Hold events. + return (self.status == self.SUBMITTED + and len(self.hold_types - self.waiver_types) > 0) + + def has_waiver_for(self, hold_type: Hold.Type) -> bool: + return hold_type in self.waiver_types + + @property + def hold_types(self) -> Set[Hold.Type]: + return set([hold.hold_type for hold in self.holds.values()]) + + @property + def waiver_types(self) -> Set[Hold.Type]: + return set([waiver.hold_type for waiver in self.waivers.values()]) + + @property + def has_active_requests(self) -> bool: + return len(self.active_user_requests) > 0 + + @property + def iter_requests(self) -> Iterable[UserRequest]: + return self.user_requests.values() + + @property + def active_user_requests(self) -> List[UserRequest]: + return sorted(filter(lambda r: r.is_active(), self.iter_requests), + key=lambda r: r.created) + + @property + def pending_user_requests(self) -> List[UserRequest]: + return sorted(filter(lambda r: r.is_pending(), self.iter_requests), + key=lambda r: r.created) + + @property + def rejected_user_requests(self) -> List[UserRequest]: + return sorted(filter(lambda r: r.is_rejected(), self.iter_requests), + key=lambda r: r.created) + + @property + def approved_user_requests(self) -> List[UserRequest]: + return sorted(filter(lambda r: r.is_approved(), self.iter_requests), + key=lambda r: r.created) + + @property + def applied_user_requests(self) -> List[UserRequest]: + return sorted(filter(lambda r: r.is_applied(), self.iter_requests), + key=lambda r: r.created) + + def __post_init__(self): + if type(self.creator) is dict: + self.creator = agent_factory(**self.creator) + if type(self.owner) is dict: + self.owner = agent_factory(**self.owner) + if self.proxy and type(self.proxy) is dict: + self.proxy = agent_factory(**self.proxy) + if self.client and type(self.client) is dict: + self.client = agent_factory(**self.client) + if type(self.created) is str: + self.created = parse_date(self.created) + if type(self.updated) is str: + self.updated = parse_date(self.updated) + if type(self.submitted) is str: + self.submitted = parse_date(self.submitted) + if type(self.source_content) is dict: + self.source_content = SubmissionContent(**self.source_content) + if type(self.primary_classification) is dict: + self.primary_classification = \ + Classification(**self.primary_classification) + if type(self.metadata) is dict: + self.metadata = SubmissionMetadata(**self.metadata) + # self.delegations = dict_coerce(Delegation, self.delegations) + self.secondary_classification = \ + list_coerce(Classification, self.secondary_classification) + if type(self.license) is dict: + self.license = License(**self.license) + self.versions = list_coerce(Submission, self.versions) + self.user_requests = dict_coerce(request_factory, self.user_requests) + self.proposals = dict_coerce(Proposal, self.proposals) + self.processes = list_coerce(ProcessStatus, self.processes) + self.annotations = dict_coerce(annotation_factory, self.annotations) + self.flags = dict_coerce(flag_factory, self.flags) + self.comments = dict_coerce(Comment, self.comments) + self.holds = dict_coerce(Hold, self.holds) + self.waivers = dict_coerce(Waiver, self.waivers) + + +def request_factory(**data: dict) -> UserRequest: + """Generate a :class:`.UserRequest` from raw data.""" + for cls in UserRequest.__subclasses__(): + if data['request_type'] == cls.__name__: + return cls(**data) + raise ValueError('Invalid request type') diff --git a/core/arxiv/submission/domain/tests/__init__.py b/core/arxiv/submission/domain/tests/__init__.py new file mode 100644 index 0000000..5aa0a13 --- /dev/null +++ b/core/arxiv/submission/domain/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for :mod:`arxiv.submission.domain`.""" diff --git a/core/arxiv/submission/domain/tests/test_events.py b/core/arxiv/submission/domain/tests/test_events.py new file mode 100644 index 0000000..73e1979 --- /dev/null +++ b/core/arxiv/submission/domain/tests/test_events.py @@ -0,0 +1,913 @@ +"""Tests for :class:`.Event` instances in :mod:`arxiv.submission.domain.event`.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC +from mimesis import Text + +from arxiv import taxonomy +from ... import save +from .. import event, agent, submission, meta +from ...exceptions import InvalidEvent + + +class TestWithdrawalSubmission(TestCase): + """Test :class:`event.RequestWithdrawal`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User( + 12345, + 'uuser@cornell.edu', + endorsements=[meta.Classification('astro-ph.GA'), + meta.Classification('astro-ph.CO')] + ) + self.submission = submission.Submission( + submission_id=1, + status=submission.Submission.ANNOUNCED, + creator=self.user, + owner=self.user, + created=datetime.now(UTC), + source_content=submission.SubmissionContent( + identifier='6543', + source_format=submission.SubmissionContent.Format('pdf'), + checksum='asdf2345', + uncompressed_size=594930, + compressed_size=594930 + ), + primary_classification=meta.Classification('astro-ph.GA'), + secondary_classification=[meta.Classification('astro-ph.CO')], + license=meta.License(uri='http://free', name='free'), + arxiv_id='1901.001234', + version=1, + submitter_contact_verified=True, + submitter_is_author=True, + submitter_accepts_policy=True, + submitter_confirmed_preview=True, + metadata=submission.SubmissionMetadata( + title='the best title', + abstract='very abstract', + authors_display='J K Jones, F W Englund', + doi='10.1000/182', + comments='These are the comments' + ) + ) + + def test_request_withdrawal(self): + """Request that a paper be withdrawn.""" + e = event.RequestWithdrawal(creator=self.user, reason="no good") + e.validate(self.submission) + replacement = e.apply(self.submission) + self.assertEqual(replacement.arxiv_id, self.submission.arxiv_id) + self.assertEqual(replacement.version, self.submission.version) + self.assertEqual(replacement.status, + submission.Submission.ANNOUNCED) + self.assertTrue(replacement.has_active_requests) + self.assertTrue(self.submission.is_announced) + self.assertTrue(replacement.is_announced) + + def test_request_without_a_reason(self): + """A reason is required.""" + e = event.RequestWithdrawal(creator=self.user) + with self.assertRaises(event.InvalidEvent): + e.validate(self.submission) + + def test_request_without_announced_submission(self): + """The submission must already be announced.""" + e = event.RequestWithdrawal(creator=self.user, reason="no good") + with self.assertRaises(event.InvalidEvent): + e.validate(mock.MagicMock(announced=False)) + + +class TestReplacementSubmission(TestCase): + """Test :class:`event.CreateSubmission` with a replacement.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User( + 12345, + 'uuser@cornell.edu', + endorsements=[meta.Classification('astro-ph.GA'), + meta.Classification('astro-ph.CO')] + ) + self.submission = submission.Submission( + submission_id=1, + status=submission.Submission.ANNOUNCED, + creator=self.user, + owner=self.user, + created=datetime.now(UTC), + source_content=submission.SubmissionContent( + identifier='6543', + source_format=submission.SubmissionContent.Format('pdf'), + checksum='asdf2345', + uncompressed_size=594930, + compressed_size=594930 + ), + primary_classification=meta.Classification('astro-ph.GA'), + secondary_classification=[meta.Classification('astro-ph.CO')], + license=meta.License(uri='http://free', name='free'), + arxiv_id='1901.001234', + version=1, + submitter_contact_verified=True, + submitter_is_author=True, + submitter_accepts_policy=True, + submitter_confirmed_preview=True, + metadata=submission.SubmissionMetadata( + title='the best title', + abstract='very abstract', + authors_display='J K Jones, F W Englund', + doi='10.1000/182', + comments='These are the comments' + ) + ) + + def test_create_submission_replacement(self): + """A replacement is a new submission based on an old submission.""" + e = event.CreateSubmissionVersion(creator=self.user) + replacement = e.apply(self.submission) + self.assertEqual(replacement.arxiv_id, self.submission.arxiv_id) + self.assertEqual(replacement.version, self.submission.version + 1) + self.assertEqual(replacement.status, submission.Submission.WORKING) + self.assertTrue(self.submission.is_announced) + self.assertFalse(replacement.is_announced) + + self.assertIsNone(replacement.source_content) + + # The user is asked to reaffirm these points. + self.assertFalse(replacement.submitter_contact_verified) + self.assertFalse(replacement.submitter_accepts_policy) + self.assertFalse(replacement.submitter_confirmed_preview) + self.assertFalse(replacement.submitter_contact_verified) + + # These should all stay the same. + self.assertEqual(replacement.metadata.title, + self.submission.metadata.title) + self.assertEqual(replacement.metadata.abstract, + self.submission.metadata.abstract) + self.assertEqual(replacement.metadata.authors, + self.submission.metadata.authors) + self.assertEqual(replacement.metadata.authors_display, + self.submission.metadata.authors_display) + self.assertEqual(replacement.metadata.msc_class, + self.submission.metadata.msc_class) + self.assertEqual(replacement.metadata.acm_class, + self.submission.metadata.acm_class) + self.assertEqual(replacement.metadata.doi, + self.submission.metadata.doi) + self.assertEqual(replacement.metadata.journal_ref, + self.submission.metadata.journal_ref) + + +class TestDOIorJREFAfterAnnounce(TestCase): + """Test :class:`event.SetDOI` or :class:`event.SetJournalReference`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User( + 12345, + 'uuser@cornell.edu', + endorsements=[meta.Classification('astro-ph.GA'), + meta.Classification('astro-ph.CO')] + ) + self.submission = submission.Submission( + submission_id=1, + status=submission.Submission.ANNOUNCED, + creator=self.user, + owner=self.user, + created=datetime.now(UTC), + source_content=submission.SubmissionContent( + identifier='6543', + source_format=submission.SubmissionContent.Format('pdf'), + checksum='asdf2345', + uncompressed_size=594930, + compressed_size=594930 + ), + primary_classification=meta.Classification('astro-ph.GA'), + secondary_classification=[meta.Classification('astro-ph.CO')], + license=meta.License(uri='http://free', name='free'), + arxiv_id='1901.001234', + version=1, + submitter_contact_verified=True, + submitter_is_author=True, + submitter_accepts_policy=True, + submitter_confirmed_preview=True, + metadata=submission.SubmissionMetadata( + title='the best title', + abstract='very abstract', + authors_display='J K Jones, F W Englund', + doi='10.1000/182', + comments='These are the comments' + ) + ) + + def test_create_submission_jref(self): + """A JREF is just like a replacement, but different.""" + e = event.SetDOI(creator=self.user, doi='10.1000/182') + after = e.apply(self.submission) + self.assertEqual(after.arxiv_id, self.submission.arxiv_id) + self.assertEqual(after.version, self.submission.version) + self.assertEqual(after.status, submission.Submission.ANNOUNCED) + self.assertTrue(self.submission.is_announced) + self.assertTrue(after.is_announced) + + self.assertIsNotNone(after.submission_id) + self.assertEqual(self.submission.submission_id, after.submission_id) + + # The user is NOT asked to reaffirm these points. + self.assertTrue(after.submitter_contact_verified) + self.assertTrue(after.submitter_accepts_policy) + self.assertTrue(after.submitter_confirmed_preview) + self.assertTrue(after.submitter_contact_verified) + + # These should all stay the same. + self.assertEqual(after.metadata.title, + self.submission.metadata.title) + self.assertEqual(after.metadata.abstract, + self.submission.metadata.abstract) + self.assertEqual(after.metadata.authors, + self.submission.metadata.authors) + self.assertEqual(after.metadata.authors_display, + self.submission.metadata.authors_display) + self.assertEqual(after.metadata.msc_class, + self.submission.metadata.msc_class) + self.assertEqual(after.metadata.acm_class, + self.submission.metadata.acm_class) + self.assertEqual(after.metadata.doi, + self.submission.metadata.doi) + self.assertEqual(after.metadata.journal_ref, + self.submission.metadata.journal_ref) + + + +class TestSetPrimaryClassification(TestCase): + """Test :class:`event.SetPrimaryClassification`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User( + 12345, + 'uuser@cornell.edu', + endorsements=[meta.Classification('astro-ph.GA'), + meta.Classification('astro-ph.CO')] + ) + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_set_primary_with_nonsense(self): + """Category is not from the arXiv taxonomy.""" + e = event.SetPrimaryClassification( + creator=self.user, + submission_id=1, + category="nonsense" + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + def test_set_primary_with_valid_category(self): + """Category is from the arXiv taxonomy.""" + for category in taxonomy.CATEGORIES.keys(): + e = event.SetPrimaryClassification( + creator=self.user, + submission_id=1, + category=category + ) + if category in self.user.endorsements: + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail("Event should be valid") + else: + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_set_primary_already_secondary(self): + """Category is already set as a secondary.""" + classification = submission.Classification('cond-mat.dis-nn') + self.submission.secondary_classification.append(classification) + e = event.SetPrimaryClassification( + creator=self.user, + submission_id=1, + category='cond-mat.dis-nn' + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + +class TestAddSecondaryClassification(TestCase): + """Test :class:`event.AddSecondaryClassification`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC), + secondary_classification=[] + ) + + def test_add_secondary_with_nonsense(self): + """Category is not from the arXiv taxonomy.""" + e = event.AddSecondaryClassification( + creator=self.user, + submission_id=1, + category="nonsense" + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + def test_add_secondary_with_valid_category(self): + """Category is from the arXiv taxonomy.""" + for category in taxonomy.CATEGORIES_ACTIVE.keys(): + e = event.AddSecondaryClassification( + creator=self.user, + submission_id=1, + category=category + ) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail("Event should be valid") + + def test_add_secondary_already_present(self): + """Category is already present on the submission.""" + self.submission.secondary_classification.append( + submission.Classification('cond-mat.dis-nn') + ) + e = event.AddSecondaryClassification( + creator=self.user, + submission_id=1, + category='cond-mat.dis-nn' + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + def test_add_secondary_already_primary(self): + """Category is already set as primary.""" + classification = submission.Classification('cond-mat.dis-nn') + self.submission.primary_classification = classification + + e = event.AddSecondaryClassification( + creator=self.user, + submission_id=1, + category='cond-mat.dis-nn' + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + +class TestRemoveSecondaryClassification(TestCase): + """Test :class:`event.RemoveSecondaryClassification`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC), + secondary_classification=[] + ) + + def test_add_secondary_with_nonsense(self): + """Category is not from the arXiv taxonomy.""" + e = event.RemoveSecondaryClassification( + creator=self.user, + submission_id=1, + category="nonsense" + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + def test_remove_secondary_with_valid_category(self): + """Category is from the arXiv taxonomy.""" + classification = submission.Classification('cond-mat.dis-nn') + self.submission.secondary_classification.append(classification) + e = event.RemoveSecondaryClassification( + creator=self.user, + submission_id=1, + category='cond-mat.dis-nn' + ) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail("Event should be valid") + + def test_remove_secondary_not_present(self): + """Category is not present.""" + e = event.RemoveSecondaryClassification( + creator=self.user, + submission_id=1, + category='cond-mat.dis-nn' + ) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) # "Event should not be valid". + + +class TestSetAuthors(TestCase): + """Test :class:`event.SetAuthors`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_canonical_authors_provided(self): + """Data includes canonical author display string.""" + e = event.SetAuthors(creator=self.user, + submission_id=1, + authors=[submission.Author()], + authors_display="Foo authors") + try: + e.validate(self.submission) + except Exception as e: + self.fail(str(e), "Data should be valid") + s = e.project(self.submission) + self.assertEqual(s.metadata.authors_display, e.authors_display, + "Authors string should be updated") + + def test_canonical_authors_not_provided(self): + """Data does not include canonical author display string.""" + e = event.SetAuthors( + creator=self.user, + submission_id=1, + authors=[ + submission.Author( + forename="Bob", + surname="Paulson", + affiliation="FSU" + ) + ]) + self.assertEqual(e.authors_display, "Bob Paulson (FSU)", + "Display string should be generated automagically") + + try: + e.validate(self.submission) + except Exception as e: + self.fail(str(e), "Data should be valid") + s = e.project(self.submission) + self.assertEqual(s.metadata.authors_display, e.authors_display, + "Authors string should be updated") + + def test_canonical_authors_contains_et_al(self): + """Author display value contains et al.""" + e = event.SetAuthors(creator=self.user, + submission_id=1, + authors=[submission.Author()], + authors_display="Foo authors, et al") + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + +class TestSetTitle(TestCase): + """Tests for :class:`.event.SetTitle`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_empty_value(self): + """Title is set to an empty string.""" + e = event.SetTitle(creator=self.user, title='') + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_reasonable_title(self): + """Title is set to some reasonable value smaller than 240 chars.""" + for _ in range(100): # Add a little fuzz to the mix. + for locale in LOCALES: + title = Text(locale=locale).text(6)[:240] \ + .strip() \ + .rstrip('.') \ + .replace('@', '') \ + .replace('#', '') \ + .title() + e = event.SetTitle(creator=self.user, title=title) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle title: %s' % title) + + def test_all_caps_title(self): + """Title is all uppercase.""" + title = Text().title()[:240].upper() + e = event.SetTitle(creator=self.user, title=title) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_title_ends_with_period(self): + """Title ends with a period.""" + title = Text().title()[:239] + "." + e = event.SetTitle(creator=self.user, title=title) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_title_ends_with_ellipsis(self): + """Title ends with an ellipsis.""" + title = Text().title()[:236] + "..." + e = event.SetTitle(creator=self.user, title=title) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail("Should accept ellipsis") + + def test_huge_title(self): + """Title is set to something unreasonably large.""" + title = Text().text(200) # 200 sentences. + e = event.SetTitle(creator=self.user, title=title) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_title_with_html_escapes(self): + """Title should not allow HTML escapes.""" + e = event.SetTitle(creator=self.user, title='foo   title') + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + +class TestSetAbstract(TestCase): + """Tests for :class:`.event.SetAbstract`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_empty_value(self): + """Abstract is set to an empty string.""" + e = event.SetAbstract(creator=self.user, abstract='') + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + def test_reasonable_abstract(self): + """Abstract is set to some reasonable value smaller than 1920 chars.""" + for locale in LOCALES: + abstract = Text(locale=locale).text(20)[:1920] + e = event.SetAbstract(creator=self.user, abstract=abstract) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle abstract: %s' % abstract) + + def test_huge_abstract(self): + """Abstract is set to something unreasonably large.""" + abstract = Text().text(200) # 200 sentences. + e = event.SetAbstract(creator=self.user, abstract=abstract) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + +class TestSetDOI(TestCase): + """Tests for :class:`.event.SetDOI`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_empty_doi(self): + """DOI is set to an empty string.""" + doi = "" + e = event.SetDOI(creator=self.user, doi=doi) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle valid DOI: %s' % e) + + def test_valid_doi(self): + """DOI is set to a single valid DOI.""" + doi = "10.1016/S0550-3213(01)00405-9" + e = event.SetDOI(creator=self.user, doi=doi) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle valid DOI: %s' % e) + + def test_multiple_valid_dois(self): + """DOI is set to multiple valid DOIs.""" + doi = "10.1016/S0550-3213(01)00405-9, 10.1016/S0550-3213(01)00405-8" + e = event.SetDOI(creator=self.user, doi=doi) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle valid DOI: %s' % e) + + def test_invalid_doi(self): + """DOI is set to something other than a valid DOI.""" + not_a_doi = "101016S0550-3213(01)00405-9" + e = event.SetDOI(creator=self.user, doi=not_a_doi) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + +class TestSetReportNumber(TestCase): + """Tests for :class:`.event.SetReportNumber`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_valid_report_number(self): + """Valid report number values are used.""" + values = [ + "IPhT-T10/027", + "SITP 10/04, OIQP-10-01", + "UK/09-07", + "COLO-HEP-550, UCI-TR-2009-12", + "TKYNT-10-01, UTHEP-605", + "1003.1130", + "CDMTCS-379", + "BU-HEPP-09-06", + "IMSC-PHYSICS/08-2009, CU-PHYSICS/2-2010", + "CRM preprint No. 867", + "SLAC-PUB-13848, AEI-2009-110, ITP-UH-18/09", + "SLAC-PUB-14011", + "KUNS-2257, DCPT-10/11", + "TTP09-41, SFB/CPP-09-110, Alberta Thy 16-09", + "DPUR/TH/20", + "KEK Preprint 2009-41, Belle Preprint 2010-02, NTLP Preprint 2010-01", + "CERN-PH-EP/2009-018", + "Computer Science ISSN 19475500", + "Computer Science ISSN 19475500", + "Computer Science ISSN 19475500", + "" + ] + for value in values: + try: + e = event.SetReportNumber(creator=self.user, report_num=value) + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle %s: %s' % (value, e)) + + def test_invalid_values(self): + """Some invalid values are passed.""" + values = [ + "not a report number", + ] + for value in values: + with self.assertRaises(InvalidEvent): + e = event.SetReportNumber(creator=self.user, report_num=value) + e.validate(self.submission) + + +class TestSetJournalReference(TestCase): + """Tests for :class:`.event.SetJournalReference`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_valid_journal_ref(self): + """Valid journal ref values are used.""" + values = [ + "Phys. Rev. Lett. 104, 097003 (2010)", + "Phys. Rev. B v81, 094405 (2010)", + "Phys. Rev. D81 (2010) 036004", + "Phys. Rev. A 74, 033822 (2006)Phys. Rev. A 74, 033822 (2006)Phys. Rev. A 74, 033822 (2006)Phys. Rev. A 81, 032303 (2010)", + "Opt. Lett. 35, 499-501 (2010)", + "Phys. Rev. D 81, 034023 (2010)", + "Opt. Lett. Vol.31 (2010)", + "Fundamental and Applied Mathematics, 14(8)(2008), 55-67. (in Russian)", + "Czech J Math, 60(135)(2010), 59-76.", + "PHYSICAL REVIEW B 81, 024520 (2010)", + "PHYSICAL REVIEW B 69, 094524 (2004)", + "Announced on Ap&SS, Oct. 2009", + "Phys. Rev. Lett. 104, 095701 (2010)", + "Phys. Rev. B 76, 205407 (2007).", + "Extending Database Technology (EDBT) 2010", + "Database and Expert Systems Applications (DEXA) 2009", + "J. Math. Phys. 51 (2010), no. 3, 033503, 12pp", + "South East Asian Bulletin of Mathematics, Vol. 33 (2009), 853-864.", + "Acta Mathematica Academiae Paedagogiace Nyíregyháziensis, Vol. 25, No. 2 (2009), 189-190.", + "Creative Mathematics and Informatics, Vol. 18, No. 1 (2009), 39-45.", + "" + ] + for value in values: + try: + e = event.SetJournalReference(creator=self.user, + journal_ref=value) + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle %s: %s' % (value, e)) + + def test_invalid_values(self): + """Some invalid values are passed.""" + values = [ + "Phys. Rev. Lett. 104, 097003 ()", + "Phys. Rev. accept submit B v81, 094405 (2010)", + "Phys. Rev. D81 036004", + ] + for value in values: + with self.assertRaises(InvalidEvent): + e = event.SetJournalReference(creator=self.user, + journal_ref=value) + e.validate(self.submission) + + +class TestSetACMClassification(TestCase): + """Tests for :class:`.event.SetACMClassification`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_valid_acm_class(self): + """ACM classification value is valid.""" + values = [ + "H.2.4", + "F.2.2; H.3.m", + "H.2.8", + "H.2.4", + "G.2.1", + "D.1.1", + "G.2.2", + "C.4", + "I.2.4", + "I.6.3", + "D.2.8", + "B.7.2", + "D.2.4; D.3.1; D.3.2; F.3.2", + "F.2.2; I.2.7", + "G.2.2", + "D.3.1; F.3.2", + "F.4.1; F.4.2", + "C.2.1; G.2.2", + "F.2.2; G.2.2; G.3; I.6.1; J.3 ", + "H.2.8; K.4.4; H.3.5", + "" + ] + for value in values: + try: + e = event.SetACMClassification(creator=self.user, + acm_class=value) + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle %s: %s' % (value, e)) + + +class TestSetMSCClassification(TestCase): + """Tests for :class:`.event.SetMSCClassification`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_valid_msc_class(self): + """MSC classification value is valid.""" + values = [ + "57M25", + "35k55; 35k65", + "60G51", + "16S15, 13P10, 17A32, 17A99", + "16S15, 13P10, 17A30", + "05A15 ; 30F10 ; 30D05", + "16S15, 13P10, 17A01, 17B67, 16D10", + "primary 05A15 ; secondary 30F10, 30D05.", + "35B45 (Primary), 35J40 (Secondary)", + "13D45, 13C14, 13Exx", + "13D45, 13C14", + "57M25; 05C50", + "32G34 (Primary), 14D07 (Secondary)", + "05C75, 60G09", + "14H20; 13A18; 13F30", + "49K10; 26A33; 26B20", + "20NO5, 08A05", + "20NO5 (Primary), 08A05 (Secondary)", + "83D05", + "20NO5; 08A05" + ] + for value in values: + try: + e = event.SetMSCClassification(creator=self.user, + msc_class=value) + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle %s: %s' % (value, e)) + + +class TestSetComments(TestCase): + """Tests for :class:`.event.SetComments`.""" + + def setUp(self): + """Initialize auxiliary data for test cases.""" + self.user = agent.User(12345, 'uuser@cornell.edu') + self.submission = submission.Submission( + submission_id=1, + creator=self.user, + owner=self.user, + created=datetime.now(UTC) + ) + + def test_empty_value(self): + """Comment is set to an empty string.""" + e = event.SetComments(creator=self.user, comments='') + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle empty comments') + + def test_reasonable_comment(self): + """Comment is set to some reasonable value smaller than 400 chars.""" + for locale in LOCALES: + comments = Text(locale=locale).text(20)[:400] + e = event.SetComments(creator=self.user, comments=comments) + try: + e.validate(self.submission) + except InvalidEvent as e: + self.fail('Failed to handle comments: %s' % comments) + + def test_huge_comment(self): + """Comment is set to something unreasonably large.""" + comments = Text().text(200) # 200 sentences. + e = event.SetComments(creator=self.user, comments=comments) + with self.assertRaises(InvalidEvent): + e.validate(self.submission) + + +# Locales supported by mimesis. +LOCALES = [ + "cs", + "da", + "de", + "de-at", + "de-ch", + "el", + "en", + "en-au", + "en-ca", + "en-gb", + "es", + "es-mx", + "et", + "fa", + "fi", + "fr", + "hu", + "is", + "it", + "ja", + "kk", + "ko", + "nl", + "nl-be", + "no", + "pl", + "pt", + "pt-br", + "ru", + "sv", + "tr", + "uk", + "zh", +] diff --git a/core/arxiv/submission/domain/util.py b/core/arxiv/submission/domain/util.py new file mode 100644 index 0000000..b8987c0 --- /dev/null +++ b/core/arxiv/submission/domain/util.py @@ -0,0 +1,19 @@ +"""Helpers and utilities.""" + +from typing import Dict, Any, List, Optional, Callable +from datetime import datetime +from pytz import UTC + + +def get_tzaware_utc_now(): + """Generate a datetime for the current moment in UTC.""" + return datetime.now(UTC) + + +def dict_coerce(factory: type, data: dict) -> Dict[str, Any]: + return {event_id: factory(**value) if type(value) is dict else value + for event_id, value in data.items()} + + +def list_coerce(factory: type, data: dict) -> List[Any]: + return [factory(**value) for value in data if type(value) is dict] diff --git a/core/arxiv/submission/exceptions.py b/core/arxiv/submission/exceptions.py new file mode 100644 index 0000000..ce53779 --- /dev/null +++ b/core/arxiv/submission/exceptions.py @@ -0,0 +1,28 @@ +"""Exceptions raised during event handling.""" + +from typing import TypeVar, List + +EventType = TypeVar('EventType', bound='core.events.domain.event.Event') + + +class InvalidEvent(ValueError): + """Raised when an invalid event is encountered.""" + + def __init__(self, event: EventType, message: str = '') -> None: + """Use the :class:`.Event` to build an error message.""" + self.event: EventType = event + self.message = message + r = f"Invalid {event.event_type}: {message}" + super(InvalidEvent, self).__init__(r) + + +class NoSuchSubmission(Exception): + """An operation was performed on/for a submission that does not exist.""" + + +class SaveError(RuntimeError): + """Failed to persist event state.""" + + +class NothingToDo(RuntimeError): + """There is nothing to do.""" diff --git a/core/arxiv/submission/schedule.py b/core/arxiv/submission/schedule.py new file mode 100644 index 0000000..fd78b1a --- /dev/null +++ b/core/arxiv/submission/schedule.py @@ -0,0 +1,66 @@ +""" +Policies for announcement scheduling. + +Submissions to arXiv are normally made public on Sunday through Thursday, with +no announcements Friday or Saturday. + ++-----------------------+----------------+------------------------------------+ +| Received Between (ET) | Announced (ET) | Mailed | ++=======================+================+====================================+ +| Mon 14:00 - Tue 14:00 | Tue 20:00 | Tuesday Night / Wednesday Morning | +| Tue 14:00 - Wed 14:00 | Wed 20:00 | Wednesday Night / Thursday Morning | +| Wed 14:00 - Thu 14:00 | Thu 20:00 | Thursday Night / Friday Morning | +| Thu 14:00 - Fri 14:00 | Sun 20:00 | Sunday Night / Monday Morning | +| Fri 14:00 - Mon 14:00 | Mon 20:00 | Monday Night / Tuesday Morning | ++-----------------------+----------------+------------------------------------+ + +""" + +from typing import Optional +from datetime import datetime, timedelta +from enum import IntEnum +from pytz import timezone, UTC + +ET = timezone('US/Eastern') + +Weekdays = IntEnum('Weekdays', 'Mon Tue Wed Thu Fri Sat Sun', start=1) + +ANNOUNCE_TIME = 20 +FREEZE_TIME = 14 + +WINDOWS = [ + ((Weekdays.Fri - 7, 14), (Weekdays.Mon, 14), (Weekdays.Mon, 20)), + ((Weekdays.Mon, 14), (Weekdays.Tue, 14), (Weekdays.Tue, 20)), + ((Weekdays.Tue, 14), (Weekdays.Wed, 14), (Weekdays.Wed, 20)), + ((Weekdays.Wed, 14), (Weekdays.Thu, 14), (Weekdays.Thu, 20)), + ((Weekdays.Thu, 14), (Weekdays.Fri, 14), (Weekdays.Sun, 20)), + ((Weekdays.Fri, 14), (Weekdays.Mon + 7, 14), (Weekdays.Mon + 7, 20)), +] + + +def _datetime(ref: datetime, isoweekday: int, hour: int) -> datetime: + days_hence = isoweekday - ref.isoweekday() + repl = dict(hour=hour, minute=0, second=0, microsecond=0) + return (ref + timedelta(days=days_hence)).replace(**repl) + + +def next_announcement_time(ref: Optional[datetime] = None) -> datetime: + """Get the datetime of the next announcement.""" + if ref is None: + ref = ET.localize(datetime.now()) + else: + ref = ref.astimezone(ET) + for start, end, announce in WINDOWS: + if _datetime(ref, *start) <= ref < _datetime(ref, *end): + return _datetime(ref, *announce) + + +def next_freeze_time(ref: Optional[datetime] = None) -> datetime: + """Get the datetime of the next freeze.""" + if ref is None: + ref = ET.localize(datetime.now()) + else: + ref = ref.astimezone(ET) + for start, end, announce in WINDOWS: + if _datetime(ref, *start) <= ref < _datetime(ref, *end): + return _datetime(ref, *end) diff --git a/core/arxiv/submission/serializer.py b/core/arxiv/submission/serializer.py new file mode 100644 index 0000000..b07228f --- /dev/null +++ b/core/arxiv/submission/serializer.py @@ -0,0 +1,121 @@ +"""JSON serialization for submission core.""" + +from typing import Any, Union, List +import json +from json.decoder import JSONDecodeError +from datetime import datetime, date +from dataclasses import asdict +from enum import Enum +from importlib import import_module +from .domain import Event, event_factory, Submission, Agent, agent_factory + +from arxiv.util.serialize import ISO8601JSONEncoder +from backports.datetime_fromisoformat import MonkeyPatch +MonkeyPatch.patch_fromisoformat() + + +# The base implementation of this decoder is too generous; we'll use this until +# base gets updated. +class ISO8601JSONDecoder(json.JSONDecoder): + """Attempts to parse ISO8601 strings as datetime objects.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Pass :func:`object_hook` to the base constructor.""" + kwargs['object_hook'] = kwargs.get('object_hook', self.object_hook) + super(ISO8601JSONDecoder, self).__init__(*args, **kwargs) + + def _try_isoparse(self, value: Any) -> Any: + """Attempt to parse a value as an ISO8601 datetime.""" + if type(value) is not str: + return value + try: + return datetime.fromisoformat(value) # type: ignore + except ValueError: + return value + + def object_hook(self, data: dict, **extra: Any) -> Any: + """Intercept and coerce ISO8601 strings to datetimes.""" + for key, value in data.items(): + if type(value) is list: + data[key] = [self._try_isoparse(v) for v in value] + else: + data[key] = self._try_isoparse(value) + return data + + +class EventJSONEncoder(ISO8601JSONEncoder): + """Encodes domain objects in this package for serialization.""" + + def default(self, obj): + """Look for domain objects, and use their dict-coercion methods.""" + if isinstance(obj, Event): + data = asdict(obj) + data['__type__'] = 'event' + elif isinstance(obj, Submission): + data = asdict(obj) + data.pop('before', None) + data.pop('after', None) + data['__type__'] = 'submission' + elif isinstance(obj, Agent): + data = asdict(obj) + data['__type__'] = 'agent' + elif isinstance(obj, type): + data = {} + data['__module__'] = obj.__module__ + data['__name__'] = obj.__name__ + data['__type__'] = 'type' + elif isinstance(obj, Enum): + data = obj.value + else: + data = super(EventJSONEncoder, self).default(obj) + return data + + +class EventJSONDecoder(ISO8601JSONDecoder): + """Decode :class:`.Event` and other domain objects from JSON data.""" + + def __init__(self, *args: Any, **kwargs: Any) -> None: + """Pass :func:`object_hook` to the base constructor.""" + kwargs['object_hook'] = kwargs.get('object_hook', self.object_hook) + super(EventJSONDecoder, self).__init__(*args, **kwargs) + + def object_hook(self, obj: dict, **extra: Any) -> Any: + """Decode domain objects in this package.""" + obj = super(EventJSONDecoder, self).object_hook(obj, **extra) + + if '__type__' in obj: + if obj['__type__'] == 'event': + obj.pop('__type__') + return event_factory(**obj) + elif obj['__type__'] == 'submission': + obj.pop('__type__') + return Submission(**obj) + elif obj['__type__'] == 'agent': + obj.pop('__type__') + return agent_factory(**obj) + elif obj['__type__'] == 'type': + # Supports deserialization of Event classes. + # + # This is fairly dangerous, since we are importing and calling + # an arbitrary object specified in data. We need to be sure to + # check that the object originates in this package, and that it + # is actually a child of Event. + module_name = obj['__module__'] + if not (module_name.startswith('arxiv.submission') + or module_name.startswith('submission')): + raise JSONDecodeError(module_name, '', pos=0) + cls = getattr(import_module(module_name), obj['__name__']) + if Event not in cls.mro(): + raise JSONDecodeError(obj['__name__'], '', pos=0) + return cls + return obj + + +def dumps(obj: Any) -> str: + """Generate JSON from a Python object.""" + return json.dumps(obj, cls=EventJSONEncoder) + + +def loads(data: str) -> Any: + """Load a Python object from JSON.""" + return json.loads(data, cls=EventJSONDecoder) diff --git a/core/arxiv/submission/services/__init__.py b/core/arxiv/submission/services/__init__.py new file mode 100644 index 0000000..32a648c --- /dev/null +++ b/core/arxiv/submission/services/__init__.py @@ -0,0 +1,6 @@ +"""External service integrations.""" + +from .classifier import Classifier +from .compiler import Compiler +from .plaintext import PlainTextService +from .stream import StreamPublisher diff --git a/core/arxiv/submission/services/classic/__init__.py b/core/arxiv/submission/services/classic/__init__.py new file mode 100644 index 0000000..f3ff60d --- /dev/null +++ b/core/arxiv/submission/services/classic/__init__.py @@ -0,0 +1,705 @@ +""" +Integration with the classic database to persist events and submission state. + +As part of the classic renewal strategy, development of new submission +interfaces must maintain data interoperability with classic components. This +service module must therefore do three main things: + +1. Store and provide access to event data generated during the submission + process, +2. Keep the classic database tables up to date so that "downstream" components + can continue to operate. +3. Patch NG submission data with state changes that occur in the classic + system. Those changes will be made directly to submission tables and not + involve event-generation. See :func:`get_submission` for details. + +Since classic components work directly on submission tables, persisting events +and resulting submission state must occur in the same transaction. We must also +verify that we are not storing events that are stale with respect to the +current state of the submission. To achieve this, the caller should use the +:func:`.util.transaction` context manager, and (when committing new events) +call :func:`.get_submission` with ``for_update=True``. This will trigger a +shared lock on the submission row(s) involved until the transaction is +committed or rolled back. + +ORM representations of the classic database tables involved in submission +are located in :mod:`.classic.models`. An additional model, :class:`.DBEvent`, +is defined in :mod:`.classic.event`. + +See also :ref:`legacy-integration`. + +""" + +from typing import List, Optional, Tuple, Set, Callable, Any +from retry import retry +from datetime import datetime +from pytz import UTC +from itertools import groupby +import copy +import traceback +from functools import reduce, wraps +from operator import ior +from dataclasses import asdict + +from flask import Flask +from sqlalchemy import or_, text +from sqlalchemy.orm.exc import NoResultFound +from sqlalchemy.exc import DBAPIError, OperationalError + +from arxiv.base import logging +from arxiv.base.globals import get_application_config, get_application_global +from ...domain.event import Event, Announce, RequestWithdrawal, SetDOI, \ + SetJournalReference, SetReportNumber, Rollback, RequestCrossList, \ + ApplyRequest, RejectRequest, ApproveRequest, AddProposal, CancelRequest, \ + CreateSubmission + +from ...domain.submission import License, Submission, WithdrawalRequest, \ + CrossListClassificationRequest +from ...domain.agent import Agent, User +from .models import Base +from .exceptions import ClassicBaseException, NoSuchSubmission, \ + TransactionFailed, Unavailable, ConsistencyError +from .util import transaction, current_session, db +from .event import DBEvent +from . import models, util, interpolate, log, proposal, load + + +logger = logging.getLogger(__name__) +logger.propagate = False + + +def handle_operational_errors(func): + """Catch SQLAlchemy OperationalErrors and raise :class:`.Unavailable`.""" + @wraps(func) + def inner(*args, **kwargs): + try: + return func(*args, **kwargs) + except OperationalError as e: + logger.error('Encountered an OperationalError calling %s', + func.__name__) + logger.error(traceback.print_exc()) + raise Unavailable('Classic database unavailable') from e + return inner + + +def is_available(**kwargs: Any) -> bool: + """Check our connection to the database.""" + try: + _check_available() + except Unavailable as e: + logger.info('Database not available: %s', e) + return False + return True + + +@handle_operational_errors +def _check_available() -> None: + """Execute ``SELECT 1`` against the database.""" + current_session().query("1").from_statement(text("SELECT 1")).all() + + +@retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_licenses() -> List[License]: + """Get a list of :class:`.domain.License` instances available.""" + license_data = current_session().query(models.License) \ + .filter(models.License.active == '1') + return [License(uri=row.name, name=row.label) for row in license_data] + + +@retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_events(submission_id: int) -> List[Event]: + """ + Load events from the classic database. + + Parameters + ---------- + submission_id : int + + Returns + ------- + list + Items are :class:`.Event` instances loaded from the class DB. + + Raises + ------ + :class:`.classic.exceptions.NoSuchSubmission` + Raised when there are no events for the provided submission ID. + + """ + session = current_session() + event_data = session.query(DBEvent) \ + .filter(DBEvent.submission_id == submission_id) \ + .order_by(DBEvent.created) + events = [datum.to_event() for datum in event_data] + if not events: # No events, no dice. + logger.error('No events for submission %s', submission_id) + raise NoSuchSubmission(f'Submission {submission_id} not found') + return events + + +@retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_user_submissions_fast(user_id: int) -> List[Submission]: + """ + Get active NG submissions for a user. + + This should not return submissions for which there are no events. + + Uses the same approach as :func:`get_submission_fast`. + + Parameters + ---------- + submission_id : int + + Returns + ------- + list + Items are the user's :class:`.domain.submission.Submission` instances. + + """ + session = current_session() + db_submissions = list( + session.query(models.Submission) + .filter(models.Submission.submitter_id == user_id) + .join(DBEvent) # Only get submissions that are also in the event table + .order_by(models.Submission.doc_paper_id.desc()) + ) + grouped = groupby(db_submissions, key=lambda dbs: dbs.doc_paper_id) + submissions: List[Submission] = [] + for arxiv_id, dbss in grouped: + logger.debug('Handle group for arXiv ID %s: %s', arxiv_id, dbss) + if arxiv_id is None: # This is an unannounced submission. + for dbs in dbss: # Each row represents a separate e-print. + submissions.append(load.to_submission(dbs)) + else: + dbss = sorted(dbss, key=lambda dbs: dbs.submission_id) + submissions.append(load.load(dbss)) + return [subm for subm in submissions if subm and not subm.is_deleted] + + +@retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_submission_fast(submission_id: int) -> List[Submission]: + """ + Get the projection of the submission directly. + + Instead of playing events forward, we grab the most recent snapshot of the + submission in the database. Since classic represents the submission using + several rows, we have to grab all of them and transform/patch as + appropriate. + + Parameters + ---------- + submission_id : int + + Returns + ------- + :class:`.domain.submission.Submission` + + Raises + ------ + :class:`.classic.exceptions.NoSuchSubmission` + Raised when there are is no submission for the provided submission ID. + + """ + return load.load(_get_db_submission_rows(submission_id)) + + +# @retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_submission(submission_id: int, for_update: bool = False) \ + -> Tuple[Submission, List[Event]]: + """ + Get the current state of a submission from the database. + + In the medium term, services that use this package will need to + play well with legacy services that integrate with the classic + database. For example, the moderation system does not use the event + model implemented here, and will therefore cause direct changes to the + submission tables that must be reflected in our representation of the + submission. + + Until those legacy components are replaced, this function loads both the + event stack and the current DB state of the submission, and uses the DB + state to patch fields that may have changed outside the purview of the + event model. + + Parameters + ---------- + submission_id : int + + Returns + ------- + :class:`.domain.submission.Submission` + list + Items are :class:`Event` instances. + + """ + # Let the caller determine the transaction scope. + session = current_session() + original_row = session.query(models.Submission) \ + .filter(models.Submission.submission_id == submission_id) \ + .join(DBEvent) + + if for_update: + # Gives us SELECT ... FOR READ. In other words, lock this row for + # writing, but allow other clients to read from it in the meantime. + original_row = original_row.with_for_update(read=True) + + try: + original_row = original_row.one() + logger.debug('Got row %s', original_row) + except NoResultFound as exc: + logger.debug('Got NoResultFound exception %s', exc) + raise NoSuchSubmission(f'Submission {submission_id} not found') + # May also raise MultipleResultsFound; if so, we want to fail loudly. + + # Load any subsequent submission rows (e.g. v=2, jref, withdrawal). + # These do not have the same legacy submission ID as the original + # submission. + subsequent_rows: List[models.Submission] = [] + arxiv_id = original_row.get_arxiv_id() + if arxiv_id is not None: + subsequent_rows = session.query(models.Submission) \ + .filter(models.Submission.doc_paper_id == arxiv_id) \ + .filter(models.Submission.submission_id != submission_id) \ + .order_by(models.Submission.submission_id.asc()) + + if for_update: # Lock these rows as well. + subsequent_rows = subsequent_rows.with_for_update(read=True) + subsequent_rows = list(subsequent_rows) # Execute query. + logger.debug('Got subsequent_rows: %s', subsequent_rows) + + try: + _events = get_events(submission_id) + except NoSuchSubmission: + _events = [] + + # If this submission originated in the classic system, we will have usable + # rows from the submission table, and either no events or events that do + # not start with a CreateSubmission event. In that case, fall back to + # ``load.load()``, which relies only on classic rows. + if not _events or not isinstance(_events[0], CreateSubmission): + logger.info('Loading a classic submission: %s', submission_id) + submission = load.load([original_row] + subsequent_rows) + if submission is None: + raise NoSuchSubmission('No such submission') + return submission, [] + + # We have an NG-native submission. + interpolator = interpolate.ClassicEventInterpolator( + original_row, + subsequent_rows, + _events + ) + return interpolator.get_submission_state() + + +# @retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def store_event(event: Event, before: Optional[Submission], + after: Optional[Submission], + *call: List[Callable]) -> Tuple[Event, Submission]: + """ + Store an event, and update submission state. + + This is where we map the NG event domain onto the classic database. The + main differences are that: + + - In the event domain, a submission is a single stream of events, but + in the classic system we create new rows in the submission database + for things like replacements, adding DOIs, and withdrawing papers. + - In the event domain, the only concept of the announced paper is the + paper ID. In the classic submission database, we also have to worry about + the row in the Document database. + + We assume that the submission states passed to this function have the + correct paper ID and version number, if announced. The submission ID on + the event and the before/after states refer to the original classic + submission only. + + Parameters + ---------- + event : :class:`Event` + before : :class:`Submission` + The state of the submission before the event occurred. + after : :class:`Submission` + The state of the submission after the event occurred. + call : list + Items are callables that accept args ``Event, Submission, Submission``. + These are called within the transaction context; if an exception is + raised, the transaction is rolled back. + + """ + # Let the caller determine the transaction scope. + session = current_session() + if event.committed: + raise TransactionFailed('%s already committed', event.event_id) + logger.debug('store event %s', event.event_type) + + doc_id: Optional[int] = None + + # This is the case that we have a new submission. + if before is None and isinstance(after, Submission): + dbs = models.Submission(type=models.Submission.NEW_SUBMISSION) + dbs.update_from_submission(after) + this_is_a_new_submission = True + + else: # Otherwise we're making an update for an existing submission. + this_is_a_new_submission = False + + # After the original submission is announced, a new Document row is + # created. This Document is shared by all subsequent Submission rows. + if before.is_announced: + doc_id = _load_document_id(before.arxiv_id, before.version) + + JREFEvents = [SetDOI, SetJournalReference, SetReportNumber] + + # From the perspective of the database, a replacement is mainly an + # incremented version number. This requires a new row in the + # database. + if after.version > before.version: + dbs = _create_replacement(doc_id, before.arxiv_id, + after.version, after, event.created) + elif isinstance(event, Rollback) and before.version > 1: + dbs = _delete_replacement(doc_id, before.arxiv_id, + before.version) + + # Withdrawals also require a new row, and they use the most recent + # version number. + elif isinstance(event, RequestWithdrawal): + dbs = _create_withdrawal(doc_id, event.reason, + before.arxiv_id, after.version, after, + event.created) + elif isinstance(event, RequestCrossList): + dbs = _create_crosslist(doc_id, event.categories, + before.arxiv_id, after.version, after, + event.created) + + # Adding DOIs and citation information (so-called "journal reference") + # also requires a new row. The version number is not incremented. + elif before.is_announced and type(event) in JREFEvents: + dbs = _create_jref(doc_id, before.arxiv_id, after.version, after, + event.created) + + elif isinstance(event, CancelRequest): + dbs = _cancel_request(event, before, after) + + # The submission has been announced. + elif isinstance(before, Submission) and before.arxiv_id is not None: + dbs = _load(paper_id=before.arxiv_id, version=before.version) + _preserve_sticky_hold(dbs, before, after, event) + dbs.update_from_submission(after) + + # The submission has not yet been announced; we're working with a + # single row. + elif isinstance(before, Submission) and before.submission_id: + dbs = _load(before.submission_id) + + _preserve_sticky_hold(dbs, before, after, event) + dbs.update_from_submission(after) + else: + raise TransactionFailed("Something is fishy") + + db_event = _new_dbevent(event) + session.add(dbs) + session.add(db_event) + + # Make sure that we get a submission ID; note that this # does not commit + # the transaction, just pushes the # SQL that we have generated so far to + # the database # server. + session.flush() + + log.handle(event, before, after) # Create admin log entry. + for func in call: + logger.debug('call %s with event %s', func, event.event_id) + func(event, before, after) + if isinstance(event, AddProposal): + proposal.add(event, before, after) + + # Attach the database object for the event to the row for the + # submission. + if this_is_a_new_submission: # Update in transaction. + db_event.submission = dbs + else: # Just set the ID directly. + db_event.submission_id = before.submission_id + + event.committed = True + + # Update the domain event and submission states with the submission ID. + # This should carry forward the original submission ID, even if the + # classic database has several rows for the submission (with different + # IDs). + if this_is_a_new_submission: + event.submission_id = dbs.submission_id + after.submission_id = dbs.submission_id + else: + event.submission_id = before.submission_id + after.submission_id = before.submission_id + return event, after + + +@retry(ClassicBaseException, tries=3, delay=1) +@handle_operational_errors +def get_titles(since: datetime) -> List[Tuple[int, str, Agent]]: + """Get titles from submissions created on or after a particular date.""" + # TODO: consider making this a param, if we need this function for anything + # else. + STATUSES_TO_CHECK = [ + models.Submission.SUBMITTED, + models.Submission.ON_HOLD, + models.Submission.NEXT_PUBLISH_DAY, + models.Submission.REMOVED, + models.Submission.USER_DELETED, + models.Submission.DELETED_ON_HOLD, + models.Submission.DELETED_PROCESSING, + models.Submission.DELETED_REMOVED, + models.Submission.DELETED_USER_EXPIRED + ] + session = current_session() + q = session.query( + models.Submission.submission_id, + models.Submission.title, + models.Submission.submitter_id, + models.Submission.submitter_email + ) + q = q.filter(models.Submission.status.in_(STATUSES_TO_CHECK)) + q = q.filter(models.Submission.created >= since) + return [ + (submission_id, title, User(native_id=user_id, email=user_email)) + for submission_id, title, user_id, user_email in q.all() + ] + + +# Private functions down here. + +def _load(submission_id: Optional[int] = None, paper_id: Optional[str] = None, + version: Optional[int] = 1, row_type: Optional[str] = None) \ + -> models.Submission: + if row_type is not None: + limit_to = [row_type] + else: + limit_to = [models.Submission.NEW_SUBMISSION, + models.Submission.REPLACEMENT] + session = current_session() + if submission_id is not None: + submission = session.query(models.Submission) \ + .filter(models.Submission.submission_id == submission_id) \ + .filter(models.Submission.type.in_(limit_to)) \ + .one() + elif submission_id is None and paper_id is not None: + submission = session.query(models.Submission) \ + .filter(models.Submission.doc_paper_id == paper_id) \ + .filter(models.Submission.version == version) \ + .filter(models.Submission.type.in_(limit_to)) \ + .order_by(models.Submission.submission_id.desc()) \ + .first() + else: + submission = None + if submission is None: + raise NoSuchSubmission("No submission row matches those parameters") + return submission + + +def _cancel_request(event, before, after): + request = before.user_requests[event.request_id] + if isinstance(request, WithdrawalRequest): + row_type = models.Submission.WITHDRAWAL + elif isinstance(request, CrossListClassificationRequest): + row_type = models.Submission.CROSS_LIST + dbs = _load(paper_id=before.arxiv_id, version=before.version, + row_type=row_type) + dbs.status = models.Submission.USER_DELETED + return dbs + + +def _load_document_id(paper_id: str, version: int) -> int: + logger.debug('get document ID with %s and %s', paper_id, version) + session = current_session() + document_id = session.query(models.Submission.document_id) \ + .filter(models.Submission.doc_paper_id == paper_id) \ + .filter(models.Submission.version == version) \ + .first() + if document_id is None: + raise NoSuchSubmission("No submission row matches those parameters") + return document_id[0] + + +def _create_replacement(document_id: int, paper_id: str, version: int, + submission: Submission, created: datetime) \ + -> models.Submission: + """ + Create a new replacement submission. + + From the perspective of the database, a replacement is mainly an + incremented version number. This requires a new row in the database. + """ + dbs = models.Submission(type=models.Submission.REPLACEMENT, + document_id=document_id, version=version) + dbs.update_from_submission(submission) + dbs.created = created + dbs.updated = created + dbs.doc_paper_id = paper_id + dbs.status = models.Submission.NOT_SUBMITTED + return dbs + + +def _delete_replacement(document_id: int, paper_id: str, version: int) \ + -> models.Submission: + session = current_session() + dbs = session.query(models.Submission) \ + .filter(models.Submission.doc_paper_id == paper_id) \ + .filter(models.Submission.version == version) \ + .filter(models.Submission.type == models.Submission.REPLACEMENT) \ + .order_by(models.Submission.submission_id.desc()) \ + .first() + dbs.status = models.Submission.USER_DELETED + return dbs + + +def _create_withdrawal(document_id: int, reason: str, paper_id: str, + version: int, submission: Submission, + created: datetime) -> models.Submission: + """ + Create a new withdrawal request. + + Withdrawals also require a new row, and they use the most recent version + number. + """ + dbs = models.Submission(type=models.Submission.WITHDRAWAL, + document_id=document_id, + version=version) + dbs.update_withdrawal(submission, reason, paper_id, version, created) + return dbs + + +def _create_crosslist(document_id: int, categories: List[str], paper_id: str, + version: int, submission: Submission, + created: datetime) -> models.Submission: + """ + Create a new crosslist request. + + Cross list requests also require a new row, and they use the most recent + version number. + """ + dbs = models.Submission(type=models.Submission.CROSS_LIST, + document_id=document_id, + version=version) + dbs.update_cross(submission, categories, paper_id, version, created) + return dbs + + +def _create_jref(document_id: int, paper_id: str, version: int, + submission: Submission, + created: datetime) -> models.Submission: + """ + Create a JREF submission. + + Adding DOIs and citation information (so-called "journal reference") also + requires a new row. The version number is not incremented. + """ + # Try to piggy-back on an existing JREF row. In the classic system, all + # three fields can get updated on the same row. + try: + most_recent_sb = _load(paper_id=paper_id, version=version, + row_type=models.Submission.JOURNAL_REFERENCE) + if most_recent_sb and not most_recent_sb.is_announced(): + most_recent_sb.update_from_submission(submission) + return most_recent_sb + except NoSuchSubmission: + pass + + # Otherwise, create a new JREF row. + dbs = models.Submission(type=models.Submission.JOURNAL_REFERENCE, + document_id=document_id, version=version) + dbs.update_from_submission(submission) + dbs.created = created + dbs.updated = created + dbs.doc_paper_id = paper_id + dbs.status = models.Submission.PROCESSING_SUBMISSION + return dbs + + +def _new_dbevent(event: Event) -> DBEvent: + """Create an event entry in the database.""" + return DBEvent(event_type=event.event_type, + event_id=event.event_id, + event_version=_get_app_version(), + data=asdict(event), + created=event.created, + creator=asdict(event.creator), + proxy=asdict(event.proxy) if event.proxy else None) + + +def _preserve_sticky_hold(dbs: models.Submission, before: Submission, + after: Submission, event: Event) -> None: + if dbs.status != models.Submission.ON_HOLD: + return + if dbs.is_on_hold() and after.status == Submission.WORKING: + dbs.sticky_status = models.Submission.ON_HOLD + + +def _get_db_submission_rows(submission_id: int) -> List[models.Submission]: + session = current_session() + head = session.query(models.Submission.submission_id, + models.Submission.doc_paper_id) \ + .filter_by(submission_id=submission_id) \ + .subquery() + dbss = list( + session.query(models.Submission) + .filter(or_(models.Submission.submission_id == submission_id, + models.Submission.doc_paper_id == head.c.doc_paper_id)) + .order_by(models.Submission.submission_id.desc()) + ) + if not dbss: + raise NoSuchSubmission('No submission found') + return dbss + + +def _get_app_version() -> str: + return get_application_config().get('CORE_VERSION', '0.0.0') + + +def init_app(app: Flask) -> None: + """Register the SQLAlchemy extension to an application.""" + db.init_app(app) + + @app.teardown_request + def teardown_request(exception) -> None: + if exception: + db.session.rollback() + db.session.remove() + + @app.teardown_appcontext + def teardown_appcontext(*args, **kwargs) -> None: + db.session.rollback() + db.session.remove() + + +def create_all() -> None: + """Create all tables in the database.""" + Base.metadata.create_all(db.engine) + + +def drop_all() -> None: + """Drop all tables in the database.""" + Base.metadata.drop_all(db.engine) + + +def _get_db_submission_rows(submission_id: int) -> List[models.Submission]: + session = current_session() + head = session.query(models.Submission.submission_id, + models.Submission.doc_paper_id) \ + .filter_by(submission_id=submission_id) \ + .subquery() + dbss = list( + session.query(models.Submission) + .filter(or_(models.Submission.submission_id == submission_id, + models.Submission.doc_paper_id == head.c.doc_paper_id)) + .order_by(models.Submission.submission_id.desc()) + ) + if not dbss: + raise NoSuchSubmission('No submission found') + return dbss diff --git a/core/events/services/classic/bootstrap.py b/core/arxiv/submission/services/classic/bootstrap.py similarity index 100% rename from core/events/services/classic/bootstrap.py rename to core/arxiv/submission/services/classic/bootstrap.py diff --git a/core/arxiv/submission/services/classic/event.py b/core/arxiv/submission/services/classic/event.py new file mode 100644 index 0000000..6b05b5a --- /dev/null +++ b/core/arxiv/submission/services/classic/event.py @@ -0,0 +1,75 @@ +"""Persistence for NG events in the classic database.""" + +from datetime import datetime +from pytz import UTC + +from sqlalchemy import Column, String, ForeignKey +from sqlalchemy.ext.indexable import index_property +from sqlalchemy.orm import relationship + +# Combining the base DateTime field with a MySQL backend does not support +# fractional seconds. Since we may be creating events only milliseconds apart, +# getting fractional resolution is essential. +from sqlalchemy.dialects.mysql import DATETIME as DateTime + +from ...domain.event import Event, event_factory +from ...domain.agent import User, Client, Agent, System, agent_factory +from .models import Base +from .util import transaction, current_session, FriendlyJSON + + +class DBEvent(Base): # type: ignore + """Database representation of an :class:`.Event`.""" + + __tablename__ = 'event' + + event_id = Column(String(40), primary_key=True) + event_type = Column(String(255)) + event_version = Column(String(20), default='0.0.0') + proxy = Column(FriendlyJSON) + proxy_id = index_property('proxy', 'agent_identifier') + client = Column(FriendlyJSON) + client_id = index_property('client', 'agent_identifier') + + creator = Column(FriendlyJSON) + creator_id = index_property('creator', 'agent_identifier') + + created = Column(DateTime(fsp=6)) + data = Column(FriendlyJSON) + submission_id = Column( + ForeignKey('arXiv_submissions.submission_id'), + index=True + ) + + submission = relationship("Submission") + + def to_event(self) -> Event: + """ + Instantiate an :class:`.Event` using event data from this instance. + + Returns + ------- + :class:`.Event` + + """ + _skip = ['creator', 'proxy', 'client', 'submission_id', 'created', + 'event_type', 'event_version'] + data = { + key: value for key, value in self.data.items() + if key not in _skip + } + data['committed'] = True # Since we're loading from the DB. + return event_factory( + event_version=self.event_version, + event_type=self.event_type, + creator=agent_factory(**self.creator), + proxy=agent_factory(**self.proxy) if self.proxy else None, + client=agent_factory(**self.client) if self.client else None, + submission_id=self.submission_id, + created=self.get_created(), + **data + ) + + def get_created(self) -> datetime: + """Get the UTC-localized creation time for this event.""" + return self.created.replace(tzinfo=UTC) diff --git a/core/arxiv/submission/services/classic/exceptions.py b/core/arxiv/submission/services/classic/exceptions.py new file mode 100644 index 0000000..8f37c99 --- /dev/null +++ b/core/arxiv/submission/services/classic/exceptions.py @@ -0,0 +1,21 @@ +"""Exceptions raised by :mod:`arxiv.submission.services.classic`.""" + + +class ClassicBaseException(RuntimeError): + """Base for classic service exceptions.""" + + +class NoSuchSubmission(ClassicBaseException): + """A request was made for a submission that does not exist.""" + + +class TransactionFailed(ClassicBaseException): + """Raised when there was a problem committing changes to the database.""" + + +class Unavailable(ClassicBaseException): + """The classic data store is not available.""" + + +class ConsistencyError(ClassicBaseException): + """Attempted to persist stale or inconsistent state.""" diff --git a/core/arxiv/submission/services/classic/interpolate.py b/core/arxiv/submission/services/classic/interpolate.py new file mode 100644 index 0000000..a3cd338 --- /dev/null +++ b/core/arxiv/submission/services/classic/interpolate.py @@ -0,0 +1,272 @@ +""" +Inject events from outside the scope of the NG submission system. + +A core concept of the :mod:`arxiv.submission.domain.event` model is that +the state of a submission can be obtained by playing forward all of the +commands/events applied to it. That works when all agents that operate +on submission state are generating commands. The problem that we face in +the short term is that some operations will be performed by legacy components +that don't generate command/event data. + +The objective of the :class:`ClassicEventInterpolator` is to reconcile +NG events/commands with aspects of the classic database that are outside its +current purview. The logic in this module will need to change as the scope +of the NG submission data architecture expands. +""" + +from typing import List, Optional, Dict, Tuple, Any +from datetime import datetime + +from arxiv.base import logging +from arxiv import taxonomy +from . import models +from ...domain.submission import Submission, UserRequest, WithdrawalRequest, \ + CrossListClassificationRequest, Hold +from ...domain.event import Event, SetDOI, SetJournalReference, \ + SetReportNumber, ApplyRequest, RejectRequest, Announce, AddHold, \ + CancelRequest, SetPrimaryClassification, AddSecondaryClassification, \ + SetTitle, SetAbstract, SetComments, SetMSCClassification, \ + SetACMClassification, SetAuthors, Reclassify, ConfirmCompiledPreview + +from ...domain.agent import System, User +from .load import status_from_classic + + +logger = logging.getLogger(__name__) +logger.propagate = False +SYSTEM = System(__name__) + + +class ClassicEventInterpolator: + """Interleaves events with classic data to get the current state.""" + + def __init__(self, current_row: models.Submission, + subsequent_rows: List[models.Submission], + events: List[Event]) -> None: + """Interleave events with classic data to get the current state.""" + self.applied_events: List[Event] = [] + self.current_row = current_row + self.db_rows = subsequent_rows + logger.debug("start with current row: %s", self.current_row) + logger.debug("start with subsequent rows: %s", + [(d.type, d.status) for d in self.db_rows]) + self.events = events + self.submission_id = current_row.submission_id + # We always start from the beginning (no submission). + self.submission: Optional[Submission] = None + self.arxiv_id = self.current_row.get_arxiv_id() + + self.requests = { + WithdrawalRequest: 0, + CrossListClassificationRequest: 0 + } + + @property + def next_row(self) -> models.Submission: + """Access the next classic database row for this submission.""" + return self.db_rows[0] + + def _insert_request_event(self, rq_class: type, event_class: type) -> None: + """Create and apply a request-related event.""" + logger.debug('insert request event, %s, %s', + rq_class.__name__, event_class.__name__) + self._inject(event_class( + creator=SYSTEM, + created=self.current_row.get_updated(), + committed=True, + request_id=rq_class.generate_request_id( + self.current_row.get_created(), + rq_class.__name__, + self.current_row.get_submitter() + ) + )) + + def _current_row_preceeds_event(self, event: Event) -> bool: + delta = self.current_row.get_updated() - event.created + # Classic lacks millisecond precision. + return (delta).total_seconds() < -1 + + def _should_advance_to_next_row(self, event: Event) -> bool: + return self._there_are_rows_remaining() \ + and self.next_row.get_created() <= event.created + + def _there_are_rows_remaining(self) -> bool: + return len(self.db_rows) > 0 + + def _advance_to_next_row(self) -> None: + if self.current_row.is_withdrawal(): + self.requests[WithdrawalRequest] += 1 + if self.current_row.is_crosslist(): + self.requests[CrossListClassificationRequest] += 1 + try: + self.current_row = self.db_rows.pop(0) + except IndexError: + self.current_row = None + + def _can_inject_from_current_row(self) -> bool: + return (self.current_row.version == 1 + or (self.current_row.is_jref() + and not self.current_row.is_deleted()) + or self.current_row.is_withdrawal() + or self.current_row.is_crosslist() + or (self.current_row.is_new_version() + and not self.current_row.is_deleted())) + + def _should_backport(self, event: Event) -> bool: + """Evaluate if this event be applied to the last announced version.""" + return type(event) in [SetDOI, SetJournalReference, SetReportNumber] \ + and self.submission.versions \ + and self.submission.version == self.submission.versions[-1].version + + def _inject_from_current_row(self) -> None: + if self.current_row.is_new_version(): + # Apply any holds created in the admin or moderation system. + if self.current_row.status == models.Submission.ON_HOLD: + self._inject(AddHold, hold_type=Hold.Type.PATCH) + + # TODO: these need some explicit event/command representations. + elif status_from_classic(self.current_row.status) \ + == Submission.SCHEDULED: + self.submission.status = Submission.SCHEDULED + elif status_from_classic(self.current_row.status) \ + == Submission.DELETED: + self.submission.status = Submission.DELETED + elif status_from_classic(self.current_row.status) \ + == Submission.ERROR: + self.submission.status = Submission.ERROR + + self._inject_primary_if_changed() + self._inject_secondaries_if_changed() + self._inject_metadata_if_changed() + self._inject_jref_if_changed() + + if self.current_row.must_process == 0: + self._inject(ConfirmCompiledPreview) + + if self.current_row.is_announced(): + self._inject(Announce, arxiv_id=self.arxiv_id) + elif self.current_row.is_jref(): + self._inject_jref_if_changed() + elif self.current_row.is_withdrawal(): + self._inject_request_if_changed(WithdrawalRequest) + elif self.current_row.is_crosslist(): + self._inject_request_if_changed(CrossListClassificationRequest) + + def _inject_primary_if_changed(self) -> None: + """Inject primary classification event if a change has occurred.""" + primary = self.current_row.primary_classification + if primary and primary.category != self.submission.primary_category: + self._inject(Reclassify, category=primary.category) + + def _inject_secondaries_if_changed(self) -> None: + """Inject secondary classification events if a change has occurred.""" + # Add any missing secondaries. + for dbc in self.current_row.categories: + if dbc.category not in self.submission.secondary_categories \ + and not dbc.is_primary: + self._inject(AddSecondaryClassification, + category=taxonomy.Category(dbc.category)) + + def _inject_metadata_if_changed(self) -> None: + row = self.current_row # For readability, below. + if self.submission.metadata.title != row.title: + self._inject(SetTitle, title=row.title) + if self.submission.metadata.abstract != row.abstract: + self._inject(SetAbstract, abstract=row.abstract) + if self.submission.metadata.comments != row.comments: + self._inject(SetComments, comments=row.comments) + if self.submission.metadata.msc_class != row.msc_class: + self._inject(SetMSCClassification, msc_class=row.msc_class) + if self.submission.metadata.acm_class != row.acm_class: + self._inject(SetACMClassification, acm_class=row.acm_class) + if self.submission.metadata.authors_display != row.authors: + self._inject(SetAuthors, authors_display=row.authors) + + def _inject_jref_if_changed(self) -> None: + row = self.current_row # For readability, below. + if self.submission.metadata.doi != self.current_row.doi: + self._inject(SetDOI, doi=row.doi) + if self.submission.metadata.journal_ref != row.journal_ref: + self._inject(SetJournalReference, journal_ref=row.journal_ref) + if self.submission.metadata.report_num != row.report_num: + self._inject(SetReportNumber, report_num=row.report_num) + + def _inject_request_if_changed(self, req_type: type) -> None: + """ + Update a request on the submission, if status changed. + + We will assume that the request itself originated in the NG system, + so we will NOT create a new request. + """ + request_id = req_type.generate_request_id(self.submission, + self.requests[req_type]) + if self.current_row.is_announced(): + self._inject(ApplyRequest, request_id=request_id) + elif self.current_row.is_deleted(): + self._inject(CancelRequest, request_id=request_id) + elif self.current_row.is_rejected(): + self._inject(RejectRequest, request_id=request_id) + + def _inject(self, event_type: type, **data: Dict[str, Any]) -> None: + created = self.current_row.get_updated() + logger.debug('inject %s', event_type.NAME) + self._apply(event_type(creator=SYSTEM, created=created, committed=True, + submission_id=self.submission_id, **data)) + + def _apply(self, event: Event) -> None: + self.submission = event.apply(self.submission) + self.applied_events.append(event) + + def _backport_event(self, event: Event) -> None: + logger.debug('backport event %s', event.NAME) + self.submission.versions[-1] = \ + event.apply(self.submission.versions[-1]) + + def get_submission_state(self) -> Tuple[Submission, List[Event]]: + """ + Get the current state of the :class:`Submission`. + + This is effectively memoized. + + Returns + ------- + :class:`.domain.submission.Submission` + The most recent state of the submission given the provided events + and database rows. + list + Items are :class:`.Event` instances applied to generate the + returned state. This may include events inferred and interpolated + from the classic database, not passed in the original set of + events. + + """ + for event in self.events: + # As we go, look for moments where a new row in the legacy + # submission table was created. + if self._current_row_preceeds_event(event) \ + or self._should_advance_to_next_row(event): + # If we find one, patch the domain submission from the + # preceding row, and load the next row. We want to do this + # before projecting the event, since we are inferring that the + # event occurred after a change was made via the legacy system. + if self._can_inject_from_current_row(): + self._inject_from_current_row() + + if self._should_advance_to_next_row(event): + self._advance_to_next_row() + + self._apply(event) # Now project the event. + + # Backport JREFs to the announced version to which they apply. + if self._should_backport(event): + self._backport_event(event) + + # Finally, patch the submission with any remaining changes that may + # have occurred via the legacy system. + while self.current_row is not None: + if self._can_inject_from_current_row(): + self._inject_from_current_row() + self._advance_to_next_row() + logger.debug('done; submission in state %s with %i events', + self.submission.status, len(self.applied_events)) + return self.submission, self.applied_events diff --git a/core/arxiv/submission/services/classic/load.py b/core/arxiv/submission/services/classic/load.py new file mode 100644 index 0000000..d5c4a8a --- /dev/null +++ b/core/arxiv/submission/services/classic/load.py @@ -0,0 +1,228 @@ +"""Supports loading :class:`.Submission` directly from classic data.""" + +from typing import List, Optional +import copy +from itertools import groupby + +from arxiv.license import LICENSES +from arxiv.base import logging + +from ... import domain +from . import models +from .patch import patch_withdrawal, patch_jref, patch_cross, patch_hold + +logger = logging.getLogger(__name__) +logger.propagate = False + + +def load(rows: List[models.Submission]) -> Optional[domain.Submission]: + """ + Load a submission entirely from its classic database rows. + + Parameters + ---------- + rows : list + Items are :class:`.models.Submission` rows loaded from the classic + database belonging to a single arXiv e-print/submission group. + + Returns + ------- + :class:`.domain.Submission` or ``None`` + Aggregated submission object (with ``.versions``). If there is no + representation (e.g. all rows are deleted), returns ``None``. + + """ + versions: List[domain.Submission] = [] + submission_id: Optional[str] = None + + # We want to work within versions, and (secondarily) in order of creation + # time. + rows = sorted(rows, key=lambda o: o.version) + logger.debug('Load from rows %s', [r.submission_id for r in rows]) + for version, version_rows in groupby(rows, key=lambda o: o.version): + # Creation time isn't all that precise in the classic database, so + # we'll use submission ID instead. + version_rows = sorted([v for v in version_rows], + key=lambda o: o.submission_id) + logger.debug('Version %s: %s', version, version_rows) + # We use the original ID to track the entire lifecycle of the + # submission in NG. + if version == 1: + submission_id = version_rows[0].submission_id + logger.debug('Submission ID: %s', submission_id) + + # Find the creation row. There may be some false starts that have been + # deleted, so we need to advance to the first non-deleted 'new' or + # 'replacement' row. + version_submission: Optional[domain.Submission] = None + while version_submission is None: + try: + row = version_rows.pop(0) + except IndexError: + break + if row.is_new_version() and \ + (row.type == row.NEW_SUBMISSION or not row.is_deleted()): + # Get the initial state of the version. + version_submission = to_submission(row, submission_id) + logger.debug('Got initial state: %s', version_submission) + + if version_submission is None: + logger.debug('Nothing to work with for this version') + continue + + # If this is not the first version, carry forward any requests. + if len(versions) > 0: + logger.debug('Bring user_requests forward from last version') + version_submission.user_requests.update(versions[-1].user_requests) + + for row in version_rows: # Remaining rows, since we popped the others. + logger.debug('Handle subsequent row: %s', row) + # We are treating JREF submissions as though there is no approval + # process; so we can just ignore deleted JREF rows. + if row.is_jref() and not row.is_deleted(): + logger.debug('JREF row') + # This should update doi, journal_ref, report_num. + version_submission = patch_jref(version_submission, row) + # For withdrawals and cross-lists, we want to get data from + # deleted rows since we keep track of all requests in the NG + # submission. + elif row.is_withdrawal(): + logger.debug('Withdrawal row') + # This should update the reason_for_withdrawal (if applied), + # and add a WithdrawalRequest to user_requests. + version_submission = patch_withdrawal(version_submission, row) + elif row.is_crosslist(): + logger.debug('Crosslist row') + # This should update the secondary classifications (if applied) + # and add a CrossListClassificationRequest to user_requests. + version_submission = patch_cross(version_submission, row) + + # We want hold information represented as a Hold on the submission + # object, not just the status. + if version_submission.is_on_hold: + version_submission = patch_hold(version_submission, row) + versions.append(version_submission) + + if not versions: + return + submission = copy.deepcopy(versions[-1]) + submission.versions = [ver for ver in versions if ver and ver.is_announced] + return submission + + +def to_submission(row: models.Submission, + submission_id: Optional[int] = None) -> domain.Submission: + """ + Generate a representation of submission state from a DB instance. + + Parameters + ---------- + row : :class:`.models.Submission` + Database row representing a :class:`.domain.submission.Submission`. + submission_id : int or None + If provided the database value is overridden when setting + :attr:`domain.Submission.submission_id`. + + Returns + ------- + :class:`.domain.submission.Submission` + + """ + status = status_from_classic(row.status) + primary = row.primary_classification + if row.submitter is None: + submitter = domain.User(native_id=row.submitter_id, + email=row.submitter_email) + else: + submitter = row.get_submitter() + if submission_id is None: + submission_id = row.submission_id + + license: Optional[domain.License] = None + if row.license: + label = LICENSES[row.license]['label'] + license = domain.License(uri=row.license, name=label) + + primary_clsn: Optional[domain.Classification] = None + if primary and primary.category: + _category = domain.Category(primary.category) + primary_clsn = domain.Classification(category=_category) + secondary_clsn = [ + domain.Classification(category=domain.Category(db_cat.category)) + for db_cat in row.categories if not db_cat.is_primary + ] + + content: Optional[domain.SubmissionContent] = None + if row.package: + if row.package.startswith('fm://'): + identifier, checksum = row.package.split('://', 1)[1].split('@', 1) + else: + identifier = row.package + checksum = "" + source_format = domain.SubmissionContent.Format(row.source_format) + content = domain.SubmissionContent(identifier=identifier, + compressed_size=0, + uncompressed_size=row.source_size, + checksum=checksum, + source_format=source_format) + + submission = domain.Submission( + submission_id=submission_id, + creator=submitter, + owner=submitter, + status=status, + created=row.get_created(), + updated=row.get_updated(), + source_content=content, + submitter_is_author=bool(row.is_author), + submitter_accepts_policy=bool(row.agree_policy), + submitter_contact_verified=bool(row.userinfo), + submitter_compiled_preview=not bool(row.must_process), + submitter_confirmed_preview=bool(row.viewed), + metadata=domain.SubmissionMetadata(title=row.title, + abstract=row.abstract, + comments=row.comments, + report_num=row.report_num, + doi=row.doi, + msc_class=row.msc_class, + acm_class=row.acm_class, + journal_ref=row.journal_ref), + license=license, + primary_classification=primary_clsn, + secondary_classification=secondary_clsn, + arxiv_id=row.doc_paper_id, + version=row.version + ) + if row.sticky_status == row.ON_HOLD or row.status == row.ON_HOLD: + submission = patch_hold(submission, row) + elif row.is_withdrawal(): + submission = patch_withdrawal(submission, row) + elif row.is_crosslist(): + submission = patch_cross(submission, row) + return submission + + +def status_from_classic(classic_status: str) -> str: + """Map classic status codes to domain submission status.""" + return STATUS_MAP.get(classic_status) + + +# Map classic status to Submission domain status. +STATUS_MAP = { + models.Submission.NOT_SUBMITTED: domain.Submission.WORKING, + models.Submission.SUBMITTED: domain.Submission.SUBMITTED, + models.Submission.ON_HOLD: domain.Submission.SUBMITTED, + models.Submission.NEXT_PUBLISH_DAY: domain.Submission.SCHEDULED, + models.Submission.PROCESSING: domain.Submission.SCHEDULED, + models.Submission.PROCESSING_SUBMISSION: domain.Submission.SCHEDULED, + models.Submission.NEEDS_EMAIL: domain.Submission.SCHEDULED, + models.Submission.ANNOUNCED: domain.Submission.ANNOUNCED, + models.Submission.DELETED_ANNOUNCED: domain.Submission.ANNOUNCED, + models.Submission.USER_DELETED: domain.Submission.DELETED, + models.Submission.DELETED_EXPIRED: domain.Submission.DELETED, + models.Submission.DELETED_ON_HOLD: domain.Submission.DELETED, + models.Submission.DELETED_PROCESSING: domain.Submission.DELETED, + models.Submission.DELETED_REMOVED: domain.Submission.DELETED, + models.Submission.DELETED_USER_EXPIRED: domain.Submission.DELETED, + models.Submission.ERROR_STATE: domain.Submission.ERROR +} diff --git a/core/arxiv/submission/services/classic/log.py b/core/arxiv/submission/services/classic/log.py new file mode 100644 index 0000000..bf00596 --- /dev/null +++ b/core/arxiv/submission/services/classic/log.py @@ -0,0 +1,135 @@ +"""Interface to the classic admin log.""" + +from typing import Optional, Iterable, Dict, Callable + +from . import models, util +from ...domain.event import Event, UnFinalizeSubmission, AcceptProposal, \ + AddSecondaryClassification, AddMetadataFlag, AddContentFlag +from ...domain.annotation import ClassifierResults +from ...domain.submission import Submission +from ...domain.agent import Agent, System +from ...domain.flag import MetadataFlag, ContentFlag + + +def log_unfinalize(event: UnFinalizeSubmission, before: Submission, + after: Submission) -> None: + """Create a log entry when a user pulls their submission for changes.""" + admin_log(event.creator.username, "unfinalize", + "user has pulled submission for editing", + username=event.creator.username, + hostname=event.creator.hostname, + submission_id=after.submission_id, + paper_id=after.arxiv_id) + + +def log_accept_system_cross(event: AcceptProposal, before: Submission, + after: Submission) -> None: + """Create a log entry when a system cross is accepted.""" + proposal = after.proposals[event.proposal_id] + if type(event.creator) is System: + if proposal.proposed_event_type is AddSecondaryClassification: + category = proposal.proposed_event_data["category"] + admin_log(event.creator.username, "admin comment", + f"Added {category} as secondary: {event.comment}", + username="system", + submission_id=after.submission_id, + paper_id=after.arxiv_id) + + +def log_stopwords(event: AddContentFlag, before: Submission, + after: Submission) -> None: + """Create a log entry when there is a problem with stopword content.""" + if event.flag_type is ContentFlag.Type.LOW_STOP: + admin_log(event.creator.username, "admin comment", + event.comment, + username="system", + submission_id=after.submission_id, + paper_id=after.arxiv_id) + + +def log_classifier_failed(event: AddMetadataFlag, before: Submission, + after: Submission) -> None: + """Create a log entry when the classifier returns no suggestions.""" + if type(event.annotation) is not ClassifierResults: + return + if not event.annotation.results: + admin_log(event.creator.username, "admin comment", + "Classifier failed to return results for submission", + username="system", + submission_id=after.submission_id, + paper_id=after.arxiv_id) + + +ON_EVENT: Dict[type, Callable[[Event, Submission, Submission], None]] = { + UnFinalizeSubmission: [log_unfinalize], + AcceptProposal: [log_accept_system_cross], + AddContentFlag: [log_stopwords] + +} +"""Logging functions to call when an event is comitted.""" + + +def handle(event: Event, before: Submission, after: Submission) -> None: + """ + Generate an admin log entry for an event that is being committed. + + Looks for a logging function in :const:`.ON_EVENT` and, if found, calls it + with the passed parameters. + + Parameters + ---------- + event : :class:`event.Event` + The event being committed. + before : :class:`.domain.submission.Submission` + State of the submission before the event. + after : :class:`.domain.submission.Submission` + State of the submission after the event. + + """ + if type(event) in ON_EVENT: + for callback in ON_EVENT[type(event)]: + callback(event, before, after) + + +def admin_log(program: str, command: str, text: str, notify: bool = False, + username: Optional[str] = None, + hostname: Optional[str] = None, + submission_id: Optional[int] = None, + paper_id: Optional[str] = None, + document_id: Optional[int] = None) -> models.AdminLogEntry: + """ + Add an entry to the admin log. + + Parameters + ---------- + program : str + Name of the application generating the log entry. + command : str + Name of the command generating the log entry. + text : str + Content of the admin log entry. + notify : bool + username : str + hostname : str + Hostname or IP address of the client. + submission_id : int + paper_id : str + document_id : int + + """ + if paper_id is None and submission_id is not None: + paper_id = f'submit/{submission_id}' + with util.transaction() as session: + entry = models.AdminLogEntry( + paper_id=paper_id, + username=username, + host=hostname, + program=program, + command=command, + logtext=text, + document_id=document_id, + submission_id=submission_id, + notify=notify + ) + session.add(entry) + return entry diff --git a/core/events/services/classic/models.py b/core/arxiv/submission/services/classic/models.py similarity index 57% rename from core/events/services/classic/models.py rename to core/arxiv/submission/services/classic/models.py index 843930c..28f47d5 100644 --- a/core/events/services/classic/models.py +++ b/core/arxiv/submission/services/classic/models.py @@ -1,18 +1,25 @@ """SQLAlchemy ORM classes for the classic database.""" import json -from typing import Optional +from typing import Optional, List, Any from datetime import datetime +from pytz import UTC from sqlalchemy import Column, Date, DateTime, Enum, ForeignKey, Text, text, \ ForeignKeyConstraint, Index, Integer, SmallInteger, String, Table -from sqlalchemy.orm import relationship, joinedload - +from sqlalchemy.orm import relationship, joinedload, backref from sqlalchemy.ext.declarative import declarative_base -from events import domain +from arxiv.base import logging +from arxiv.license import LICENSES +from arxiv import taxonomy + +from ... import domain +from .util import transaction Base = declarative_base() +logger = logging.getLogger(__name__) + class Submission(Base): # type: ignore """Represents an arXiv submission.""" @@ -39,14 +46,14 @@ class Submission(Base): # type: ignore PROCESSING = 5 """Scheduled for today.""" NEEDS_EMAIL = 6 - """Published, not yet announced.""" + """Announced, not yet announced.""" - PUBLISHED = 7 - DELETED_PUBLISHED = 27 - """Published and files expired.""" + ANNOUNCED = 7 + DELETED_ANNOUNCED = 27 + """Announced and files expired.""" PROCESSING_SUBMISSION = 8 - REMOVED = 9 + REMOVED = 9 # This is "rejected". USER_DELETED = 10 ERROR_STATE = 19 @@ -58,41 +65,21 @@ class Submission(Base): # type: ignore DELETED_PROCESSING = 25 DELETED_REMOVED = 29 - DELETED_USER = 30 + DELETED_USER_EXPIRED = 30 """User deleted and files expired.""" - DELETED = [ + DELETED = ( USER_DELETED, DELETED_ON_HOLD, DELETED_PROCESSING, - DELETED_REMOVED, DELETED_USER - ] + DELETED_REMOVED, DELETED_USER_EXPIRED, DELETED_EXPIRED + ) - NEW_SUBMSSION = 'new' + NEW_SUBMISSION = 'new' REPLACEMENT = 'rep' JOURNAL_REFERENCE = 'jref' - WITHDRAWAL = 'dr' - + WITHDRAWAL = 'wdr' + CROSS_LIST = 'cross' WITHDRAWN_FORMAT = 'withdrawn' - # Map classic status to Submission domain status. - STATUS_MAP = { - NOT_SUBMITTED: domain.Submission.WORKING, - SUBMITTED: domain.Submission.SUBMITTED, - ON_HOLD: domain.Submission.ON_HOLD, - NEXT_PUBLISH_DAY: domain.Submission.SCHEDULED, - PROCESSING: domain.Submission.SCHEDULED, - PROCESSING_SUBMISSION: domain.Submission.SCHEDULED, - NEEDS_EMAIL: domain.Submission.SCHEDULED, - PUBLISHED: domain.Submission.PUBLISHED, - DELETED_PUBLISHED: domain.Submission.PUBLISHED, - USER_DELETED: domain.Submission.DELETED, - DELETED_EXPIRED: domain.Submission.DELETED, - DELETED_ON_HOLD: domain.Submission.DELETED, - DELETED_PROCESSING: domain.Submission.DELETED, - DELETED_REMOVED: domain.Submission.DELETED, - DELETED_USER: domain.Submission.DELETED, - ERROR_STATE: domain.Submission.ERROR - } - submission_id = Column(Integer, primary_key=True) type = Column(String(8), index=True) @@ -119,8 +106,8 @@ class Submission(Base): # type: ignore ) submitter_name = Column(String(64)) submitter_email = Column(String(64)) - created = Column(DateTime) - updated = Column(DateTime) + created = Column(DateTime, default=lambda: datetime.now(UTC)) + updated = Column(DateTime, onupdate=lambda: datetime.now(UTC)) status = Column(Integer, nullable=False, index=True, server_default=text("'0'")) sticky_status = Column(Integer) @@ -180,120 +167,67 @@ class Submission(Base): # type: ignore submitter = relationship('User') sword = relationship('Tracking') categories = relationship('SubmissionCategory', - back_populates="submission", lazy='joined') - - def patch(self, submission: domain.Submission) -> domain.Submission: - """ - Patch a :class:`.Submission` with data outside the event scope. - - There are several fields that may change after a submission enters the - classic moderation and publication system, that cannot be inferred - from the event stack. - - Parameters - ---------- - submission : :class:`.domain.Submission` - The submission object to patch. - - Returns - ------- - :class:`.domain.Submission` - The same submission that was passed; now patched with data outside - the scope of the event model. - - """ - # Status changes. - submission.status = self._get_status() - submission.active = (submission.status not in - [submission.DELETED, submission.PUBLISHED]) - submission.published = (submission.status == submission.PUBLISHED) - submission.arxiv_id = self._get_arxiv_id() - - # Possible reclassification. - primary = self.primary_classification - if primary: - submission.primary_classification = domain.Classification( - category=primary.category - ) - submission.secondary_classification = [ - domain.Classification(category=db_cat.category) - for db_cat in self.categories - if db_cat.is_primary == 0 - ] + back_populates='submission', lazy='joined', + cascade="all, delete-orphan") - # Comments (admins may modify). - submission.metadata.comments = self.comments - - # Apply sticky status. - if submission.finalized and self.sticky_status is self.ON_HOLD: - submission.status = submission.ON_HOLD - return submission - - def to_submission(self) -> domain.Submission: - """ - Generate a representation of submission state from a DB instance. - - Returns - ------- - :class:`.domain.Submission` - - """ - status = self._get_status() - primary = self.primary_classification - submitter = domain.User( - native_id=self.submitter.user_id, - email=self.submitter.email, - forename=self.submitter.first_name, - surname=self.submitter.last_name, - suffix=self.submitter.suffix_name - ) - return domain.Submission( - creator=submitter, - owner=submitter, - created=self.created, - updated=self.updated, - submitter_is_author=bool(self.is_author), - submitter_accepts_policy=bool(self.agree_policy), - submitter_contact_verified=bool(self.userinfo), - status=status, - finalized=(status != domain.Submission.WORKING), - active=(status not in [domain.Submission.DELETED, - domain.Submission.PUBLISHED]), - published=(status == domain.Submission.PUBLISHED), - metadata=domain.SubmissionMetadata( - title=self.title, - abstract=self.abstract, - comments=self.comments, - report_num=self.report_num, - doi=self.doi, - msc_class=self.msc_class, - acm_class=self.acm_class, - journal_ref=self.journal_ref - ), - license=domain.License( - uri=self.arXiv_license.name, - name=self.arXiv_license.label - ) if self.arXiv_license else None, - primary_classification=domain.Classification( - category=primary.category - ) if primary else None, - secondary_classification=[ - domain.Classification(category=db_cat.category) - for db_cat in self.categories - if db_cat.is_primary == 0 - ] - ) + def get_submitter(self) -> domain.User: + """Generate a :class:`.User` representing the submitter.""" + extra = {} + if self.submitter: + extra.update(dict(forename=self.submitter.first_name, + surname=self.submitter.last_name, + suffix=self.submitter.suffix_name)) + return domain.User(native_id=self.submitter_id, + email=self.submitter_email, **extra) + + + WDR_DELIMETER = '. Withdrawn: ' + + def get_withdrawal_reason(self) -> Optional[str]: + """Extract the withdrawal reason from the comments field.""" + if Submission.WDR_DELIMETER not in self.comments: + return + return self.comments.split(Submission.WDR_DELIMETER, 1)[1] + + def update_withdrawal(self, submission: domain.Submission, reason: str, + paper_id: str, version: int, + created: datetime) -> None: + """Update withdrawal request information in the database.""" + self.update_from_submission(submission) + self.created = created + self.updated = created + self.doc_paper_id = paper_id + self.status = Submission.PROCESSING_SUBMISSION + reason = f"{Submission.WDR_DELIMETER}{reason}" + self.comments = self.comments.rstrip('. ') + reason + + def update_cross(self, submission: domain.Submission, + categories: List[str], paper_id: str, version: int, + created: datetime) -> None: + """Update cross-list request information in the database.""" + self.update_from_submission(submission) + self.created = created + self.updated = created + self.doc_paper_id = paper_id + self.status = Submission.PROCESSING_SUBMISSION + for category in categories: + self.categories.append( + SubmissionCategory(submission_id=self.submission_id, + category=category, is_primary=0)) def update_from_submission(self, submission: domain.Submission) -> None: - """Update this database object from a :class:`.domain.Submission`.""" + """Update this database object from a :class:`.domain.submission.Submission`.""" + if self.is_announced(): # Avoid doing anything. to be safe. + return + self.submitter_id = submission.creator.native_id self.submitter_name = submission.creator.name self.submitter_email = submission.creator.email - self.is_author = int(submission.submitter_is_author) - self.agree_policy = int(submission.submitter_accepts_policy) - self.userinfo = int(submission.submitter_contact_verified) - self.created = submission.created - self.updated = datetime.now() + self.is_author = 1 if submission.submitter_is_author else 0 + self.agree_policy = 1 if submission.submitter_accepts_policy else 0 + self.userinfo = 1 if submission.submitter_contact_verified else 0 + self.viewed = 1 if submission.submitter_confirmed_preview else 0 + self.updated = submission.updated self.title = submission.metadata.title self.abstract = submission.metadata.abstract self.authors = submission.metadata.authors_display @@ -303,30 +237,59 @@ def update_from_submission(self, submission: domain.Submission) -> None: self.msc_class = submission.metadata.msc_class self.acm_class = submission.metadata.acm_class self.journal_ref = submission.metadata.journal_ref + + self.version = submission.version # Numeric version. + self.doc_paper_id = submission.arxiv_id # arXiv canonical ID. + + # The document ID is a legacy concept, and not replicated in the NG + # data model. So we need to grab it from the arXiv_documents table + # using the doc_paper_id. + if self.doc_paper_id and not self.document_id: + doc = _load_document(paper_id=self.doc_paper_id) + self.document_id = doc.document_id + if submission.license: self.license = submission.license.uri - self.type = Submission.NEW # We're not handling other types here. if submission.source_content is not None: + self.source_size = submission.source_content.uncompressed_size + if submission.source_content.source_format is not None: + self.source_format = \ + submission.source_content.source_format.value + else: + self.source_format = None + self.package = (f'fm://{submission.source_content.identifier}' + f'@{submission.source_content.checksum}') + + if submission.submitter_compiled_preview: self.must_process = 0 - self.source_size = submission.source_content.size - self.source_format = submission.source_content.format + else: + self.must_process = 1 # Not submitted -> Submitted. - if submission.finalized and self.status is Submission.NOT_SUBMITTED: + if submission.is_finalized \ + and self.status in [Submission.NOT_SUBMITTED, None]: self.status = Submission.SUBMITTED self.submit_time = submission.updated + # Delete. + elif submission.is_deleted: + self.status = Submission.USER_DELETED + elif submission.is_on_hold: + self.status = Submission.ON_HOLD # Unsubmit. elif self.status is None or self.status <= Submission.ON_HOLD: - if not submission.finalized: + if not submission.is_finalized: self.status = Submission.NOT_SUBMITTED if submission.primary_classification: self._update_primary(submission) self._update_secondaries(submission) - self._update_submitter(submission) + # We only want to set the creation datetime on the initial row. + if self.version == 1 and self.type == Submission.NEW_SUBMISSION: + self.created = submission.created + @property def primary_classification(self): """Get the primary classification for this submission.""" @@ -338,23 +301,66 @@ def primary_classification(self): except IndexError: return - def _get_arxiv_id(self) -> Optional[str]: + def get_arxiv_id(self) -> Optional[str]: + """Get the arXiv identifier for this submission.""" if not self.document: return return self.document.paper_id - def _get_status(self) -> str: - """Map classic status codes to :class:`.domain.Submission` status.""" - if self._get_arxiv_id() is not None: - return domain.Submission.PUBLISHED - return self.STATUS_MAP.get(self.status) + def get_created(self) -> datetime: + """Get the UTC-localized creation datetime.""" + return self.created.replace(tzinfo=UTC) + + def get_updated(self) -> datetime: + """Get the UTC-localized updated datetime.""" + return self.updated.replace(tzinfo=UTC) + + def is_working(self) -> bool: + return self.status == self.NOT_SUBMITTED + + def is_announced(self) -> bool: + return self.status in [self.ANNOUNCED, self.DELETED_ANNOUNCED] + + def is_active(self) -> bool: + return not self.is_announced() and not self.is_deleted() + + def is_rejected(self) -> bool: + return self.status == self.REMOVED + + def is_finalized(self) -> bool: + return self.status > self.WORKING and not self.is_deleted() + + def is_deleted(self) -> bool: + return self.status in self.DELETED + + def is_on_hold(self) -> bool: + return self.status == self.ON_HOLD + + def is_new_version(self) -> bool: + """Indicate whether this row represents a new version.""" + return self.type in [self.NEW_SUBMISSION, self.REPLACEMENT] + + def is_withdrawal(self) -> bool: + return self.type == self.WITHDRAWAL + + def is_crosslist(self) -> bool: + return self.type == self.CROSS_LIST + + def is_jref(self) -> bool: + return self.type == self.JOURNAL_REFERENCE + + @property + def secondary_categories(self) -> List[str]: + """Category names from this submission's secondary classifications.""" + return [c.category for c in self.categories if c.is_primary == 0] def _update_submitter(self, submission: domain.Submission) -> None: - """Update submitter information.""" + """Update submitter information on this row.""" self.submitter_id = submission.creator.native_id + self.submitter_email = submission.creator.email def _update_primary(self, submission: domain.Submission) -> None: - """Update primary classification.""" + """Update primary classification on this row.""" primary_category = submission.primary_classification.category cur_primary = self.primary_classification @@ -374,24 +380,17 @@ def _update_primary(self, submission: domain.Submission) -> None: ) def _update_secondaries(self, submission: domain.Submission) -> None: - """Update secondary classifications.""" - cur_secondaries = [ - db_cat.category for db_cat - in self.categories if db_cat.is_primary == 0 - ] - tgt_secondaries = [ - cat.category for cat in submission.secondary_classification - ] + """Update secondary classifications on this row.""" # Remove any categories that have been removed from the Submission. for db_cat in self.categories: if db_cat.is_primary == 1: continue - if db_cat.category not in tgt_secondaries: + if db_cat.category not in submission.secondary_categories: self.categories.remove(db_cat) # Add any new secondaries for cat in submission.secondary_classification: - if cat.category not in cur_secondaries: + if cat.category not in self.secondary_categories: self.categories.append( SubmissionCategory( submission_id=self.submission_id, @@ -458,10 +457,10 @@ class SubmissionCategory(Base): # type: ignore class Document(Base): # type: ignore """ - Represents a published arXiv paper. + Represents an announced arXiv paper. This is here so that we can look up the arXiv ID after a submission is - published. + announced. """ __tablename__ = 'arXiv_documents' @@ -486,9 +485,14 @@ class Document(Base): # type: ignore submitter_id = Column(ForeignKey('tapir_users.user_id'), index=True) submitter = relationship('User') + @property + def dated_datetime(self) -> datetime: + """Return the created time as a datetime.""" + return datetime.utcfromtimestamp(self.dated).replace(tzinfo=UTC) + class DocumentCategory(Base): # type: ignore - """Relation between published arXiv papers and their classifications.""" + """Relation between announced arXiv papers and their classifications.""" __tablename__ = 'arXiv_document_category' @@ -573,6 +577,50 @@ class User(Base): # type: ignore tapir_policy_class = relationship('PolicyClass') + def to_user(self) -> domain.agent.User: + return domain.agent.User( + self.user_id, + self.email, + username=self.username, + forename=self.first_name, + surname=self.last_name, + suffix=self.suffix_name + ) + + +class Username(Base): # type: ignore + """ + Users' usernames (because why not have a separate table). + + +--------------+------------------+------+-----+---------+----------------+ + | Field | Type | Null | Key | Default | Extra | + +--------------+------------------+------+-----+---------+----------------+ + | nick_id | int(10) unsigned | NO | PRI | NULL | autoincrement | + | nickname | varchar(20) | NO | UNI | | | + | user_id | int(4) unsigned | NO | MUL | 0 | | + | user_seq | int(1) unsigned | NO | | 0 | | + | flag_valid | int(1) unsigned | NO | MUL | 0 | | + | role | int(10) unsigned | NO | MUL | 0 | | + | policy | int(10) unsigned | NO | MUL | 0 | | + | flag_primary | int(1) unsigned | NO | | 0 | | + +--------------+------------------+------+-----+---------+----------------+ + """ + + __tablename__ = 'tapir_nicknames' + + nick_id = Column(Integer, primary_key=True) + nickname = Column(String(20), nullable=False, unique=True, index=True) + user_id = Column(ForeignKey('tapir_users.user_id'), nullable=False, + server_default=text("'0'")) + user = relationship('User') + user_seq = Column(Integer, nullable=False, server_default=text("'0'")) + flag_valid = Column(Integer, nullable=False, server_default=text("'0'")) + role = Column(Integer, nullable=False, server_default=text("'0'")) + policy = Column(Integer, nullable=False, server_default=text("'0'")) + flag_primary = Column(Integer, nullable=False, server_default=text("'0'")) + + user = relationship('User') + # TODO: what is this? class PolicyClass(Base): # type: ignore @@ -733,3 +781,122 @@ class Category(Base): # type: ignore papers_to_endorse = Column(SmallInteger, nullable=False, server_default=text("'0'")) + + +class AdminLogEntry(Base): # type: ignore + """ + + +---------------+-----------------------+------+-----+-------------------+ + | Field | Type | Null | Key | Default | + +---------------+-----------------------+------+-----+-------------------+ + | id | int(11) | NO | PRI | NULL | + | logtime | varchar(24) | YES | | NULL | + | created | timestamp | NO | | CURRENT_TIMESTAMP | + | paper_id | varchar(20) | YES | MUL | NULL | + | username | varchar(20) | YES | | NULL | + | host | varchar(64) | YES | | NULL | + | program | varchar(20) | YES | | NULL | + | command | varchar(20) | YES | MUL | NULL | + | logtext | text | YES | | NULL | + | document_id | mediumint(8) unsigned | YES | | NULL | + | submission_id | int(11) | YES | MUL | NULL | + | notify | tinyint(1) | YES | | 0 | + +---------------+-----------------------+------+-----+-------------------+ + """ + + __tablename__ = 'arXiv_admin_log' + + id = Column(Integer, primary_key=True) + logtime = Column(String(24), nullable=True) + created = Column(DateTime, default=lambda: datetime.now(UTC)) + paper_id = Column(String(20), nullable=True) + username = Column(String(20), nullable=True) + host = Column(String(64), nullable=True) + program = Column(String(20), nullable=True) + command = Column(String(20), nullable=True) + logtext = Column(Text, nullable=True) + document_id = Column(Integer, nullable=True) + submission_id = Column(Integer, nullable=True) + notify = Column(Integer, nullable=True, default=0) + + +class CategoryProposal(Base): # type: ignore + """ + Represents a proposal to change the classification of a submission. + + +---------------------+-----------------+------+-----+---------+ + | Field | Type | Null | Key | Default | + +---------------------+-----------------+------+-----+---------+ + | proposal_id | int(11) | NO | PRI | NULL | + | submission_id | int(11) | NO | PRI | NULL | + | category | varchar(32) | NO | PRI | NULL | + | is_primary | tinyint(1) | NO | PRI | 0 | + | proposal_status | int(11) | YES | | 0 | + | user_id | int(4) unsigned | NO | MUL | NULL | + | updated | datetime | YES | | NULL | + | proposal_comment_id | int(11) | YES | MUL | NULL | + | response_comment_id | int(11) | YES | MUL | NULL | + +---------------------+-----------------+------+-----+---------+ + """ + + __tablename__ = 'arXiv_submission_category_proposal' + + UNRESOLVED = 0 + ACCEPTED_AS_PRIMARY = 1 + ACCEPTED_AS_SECONDARY = 2 + REJECTED = 3 + DOMAIN_STATUS = { + UNRESOLVED: domain.proposal.Proposal.Status.PENDING, + ACCEPTED_AS_PRIMARY: domain.proposal.Proposal.Status.ACCEPTED, + ACCEPTED_AS_SECONDARY: domain.proposal.Proposal.Status.ACCEPTED, + REJECTED: domain.proposal.Proposal.Status.REJECTED + } + + proposal_id = Column(Integer, primary_key=True) + submission_id = Column(ForeignKey('arXiv_submissions.submission_id')) + submission = relationship('Submission') + category = Column(String(32)) + is_primary = Column(Integer, server_default=text("'0'")) + proposal_status = Column(Integer, nullable=True, server_default=text("'0'")) + user_id = Column(ForeignKey('tapir_users.user_id')) + user = relationship("User") + updated = Column(DateTime, default=lambda: datetime.now(UTC)) + proposal_comment_id = Column(ForeignKey('arXiv_admin_log.id'), + nullable=True) + proposal_comment = relationship("AdminLogEntry", + foreign_keys=[proposal_comment_id]) + response_comment_id = Column(ForeignKey('arXiv_admin_log.id'), + nullable=True) + response_comment = relationship("AdminLogEntry", + foreign_keys=[response_comment_id]) + + def status_from_domain(self, proposal: domain.proposal.Proposal) -> int: + if proposal.status == domain.proposal.Proposal.Status.PENDING: + return self.UNRESOLVED + elif proposal.status == domain.proposal.Proposal.Status.REJECTED: + return self.REJECTED + elif proposal.status == domain.proposal.Proposal.Status.ACCEPTED: + if proposal.proposed_event_type \ + is domain.event.SetPrimaryClassification: + return self.ACCEPTED_AS_PRIMARY + else: + return self.ACCEPTED_AS_SECONDARY + + + +def _load_document(paper_id: str) -> Document: + with transaction() as session: + document = session.query(Document) \ + .filter(Document.paper_id == paper_id) \ + .one() + if document is None: + raise RuntimeError('No such document') + return document + + +def _get_user_by_username(username: str) -> User: + with transaction() as session: + return (session.query(Username) + .filter(Username.nickname == username) + .first() + .user) diff --git a/core/arxiv/submission/services/classic/patch.py b/core/arxiv/submission/services/classic/patch.py new file mode 100644 index 0000000..b24985b --- /dev/null +++ b/core/arxiv/submission/services/classic/patch.py @@ -0,0 +1,120 @@ +"""Methods for updating :class:`.Submission` with state outside event scope.""" + +from typing import List, Dict, Any +from ... import domain +from . import models + + +def patch_hold(submission: domain.Submission, + row: models.Submission) -> domain.Submission: + """Patch hold-related data from this database row.""" + if not row.is_new_version(): + raise ValueError('Only applies to new and replacement rows') + + if row.status == row.ON_HOLD: + created = row.get_updated() + creator = domain.agent.System(__name__) + event_id = domain.Event.get_id(created, 'AddHold', creator) + hold = domain.Hold(event_id=event_id, creator=creator, + created=created, + hold_type=domain.Hold.Type.PATCH) + submission.holds[event_id] = hold + return submission + + +def patch_jref(submission: domain.Submission, + row: models.Submission) -> domain.Submission: + """ + Patch a :class:`.domain.submission.Submission` with JREF data outside the event scope. + + Parameters + ---------- + submission : :class:`.domain.submission.Submission` + The submission object to patch. + + Returns + ------- + :class:`.domain.submission.Submission` + The same submission that was passed; now patched with JREF data + outside the scope of the event model. + + """ + submission.metadata.doi = row.doi + submission.metadata.journal_ref = row.journal_ref + submission.metadata.report_num = row.report_num + return submission + + +# This should update the reason_for_withdrawal (if applied), +# and add a WithdrawalRequest to user_requests. +def patch_withdrawal(submission: domain.Submission, row: models.Submission, + request_number: int = -1) -> domain.Submission: + req_type = domain.WithdrawalRequest + data = {'reason_for_withdrawal': row.get_withdrawal_reason()} + return _patch_request(req_type, data, submission, row, request_number) + + +def patch_cross(submission: domain.Submission, row: models.Submission, + request_number: int = -1) -> domain.Submission: + req_type = domain.CrossListClassificationRequest + clsns = [domain.Classification(dbc.category) for dbc in row.categories + if not dbc.is_primary + and dbc.category not in submission.secondary_categories] + data = {'classifications': clsns} + return _patch_request(req_type, data, submission, row, request_number) + + +def _patch_request(req_type: type, data: Dict[str, Any], + submission: domain.Submission, row: models.Submission, + request_number: int = -1) -> domain.Submission: + status = req_type.WORKING + if row.is_announced(): + status = req_type.APPLIED + elif row.is_deleted(): + status = req_type.CANCELLED + elif row.is_rejected(): + status = req_type.REJECTED + elif not row.is_working(): + status = req_type.PENDING # Includes hold state. + data.update({'status': status}) + request_id = req_type.generate_request_id(submission, request_number) + + if request_number < 0: + creator = domain.User(native_id=row.submitter_id, + email=row.submitter_email) + user_request = req_type(creator=creator, created=row.get_created(), + updated=row.get_updated(), + request_id=request_id, **data) + else: + user_request = submission.user_requests[request_id] + if any([setattr_changed(user_request, field, value) + for field, value in data.items()]): + user_request.updated = row.get_updated() + submission.user_requests[request_id] = user_request + + if status == req_type.APPLIED: + submission = user_request.apply(submission) + return submission + + +def setattr_changed(obj: Any, field: str, value: Any) -> bool: + """ + Set an attribute on an object only if the value does not match provided. + + Parameters + ---------- + obj : object + field : str + The name of the attribute on ``obj`` to set. + value : object + + Returns + ------- + bool + True if the attribute was set; otherwise False. + + """ + if getattr(obj, field) != value: + setattr(obj, field, value) + return True + return False diff --git a/core/arxiv/submission/services/classic/proposal.py b/core/arxiv/submission/services/classic/proposal.py new file mode 100644 index 0000000..4f2b89d --- /dev/null +++ b/core/arxiv/submission/services/classic/proposal.py @@ -0,0 +1,64 @@ +"""Integration with classic proposals.""" + +from sqlalchemy.orm.exc import NoResultFound, MultipleResultsFound + +from . import models, util, log +from ... import domain +from ...domain.event import Event, SetPrimaryClassification, \ + AddSecondaryClassification, AddProposal +from ...domain.submission import Submission + + +def add(event: AddProposal, before: Submission, after: Submission) -> None: + """ + Add a category proposal to the database. + + The objective here is simply to create a new proposal entry in the classic + database when an :class:`domain.event.AddProposal` event is stored. + + Parameters + ---------- + event : :class:`event.Event` + The event being committed. + before : :class:`.domain.submission.Submission` + State of the submission before the event. + after : :class:`.domain.submission.Submission` + State of the submission after the event. + + """ + supported = [SetPrimaryClassification, AddSecondaryClassification] + if event.proposed_event_type not in supported: + return + + category = event.proposed_event_data['category'] + is_primary = event.proposed_event_type is SetPrimaryClassification + with util.transaction() as session: + try: + existing_proposal = session.query(models.CategoryProposal) \ + .filter(models.CategoryProposal.submission_id == after.submission_id) \ + .filter(models.CategoryProposal.category == category) \ + .one() + return # Proposal already exists. + except MultipleResultsFound: + return # Proposal already exists (in spades!). + except NoResultFound: + pass + comment = None + if event.comment: + comment = log.admin_log(event.creator.username, 'admin comment', + event.comment, + username=event.creator.username, + hostname=event.creator.hostname, + submission_id=after.submission_id) + + session.add( + models.CategoryProposal( + submission_id=after.submission_id, + category=category, + is_primary=int(is_primary), + user_id=event.creator.native_id, + updated=event.created, + proposal_status=models.CategoryProposal.UNRESOLVED, + proposal_comment=comment + ) + ) diff --git a/core/arxiv/submission/services/classic/tests/__init__.py b/core/arxiv/submission/services/classic/tests/__init__.py new file mode 100644 index 0000000..61a51b9 --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/__init__.py @@ -0,0 +1,11 @@ +""" +Integration tests for the classic database service. + +These tests assume that SQLAlchemy's MySQL backend is implemented correctly: +instead of using a live MySQL database, they use an in-memory SQLite database. +This is mostly fine (they are intended to be more-or-less swappable). The one +iffy bit is the JSON datatype, which is not available by default in the SQLite +backend. We extend the SQLite engine with a JSON type in +:mod:`arxiv.submission.services.classic.util`. End to end tests with a live +MySQL database will provide more confidence in this area. +""" diff --git a/core/arxiv/submission/services/classic/tests/test_admin_log.py b/core/arxiv/submission/services/classic/tests/test_admin_log.py new file mode 100644 index 0000000..f924d9f --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_admin_log.py @@ -0,0 +1,97 @@ +"""Tests for admin log integration.""" + +from unittest import TestCase, mock +import os +from datetime import datetime +from contextlib import contextmanager +import json +from pytz import UTC + +from flask import Flask + +from ....domain.agent import User, System +from ....domain.submission import Submission, Author +from ....domain.event import CreateSubmission, ConfirmPolicy, SetTitle +from .. import models, store_event, log, current_session + +from .util import in_memory_db + + +class TestAdminLog(TestCase): + """Test adding an admin long entry with :func:`.log.admin_log`.""" + + def test_add_admin_log_entry(self): + """Add a log entry.""" + with in_memory_db(): + log.admin_log( + "fooprogram", + "test", + "this is a test of the admin log", + username="foouser", + hostname="127.0.0.1", + submission_id=5 + ) + + session = current_session() + logs = session.query(models.AdminLogEntry).all() + self.assertEqual(len(logs), 1) + self.assertEqual(logs[0].program, "fooprogram") + self.assertEqual(logs[0].command, "test") + self.assertEqual(logs[0].logtext, + "this is a test of the admin log") + self.assertEqual(logs[0].username, "foouser") + self.assertEqual(logs[0].host, "127.0.0.1") + self.assertEqual(logs[0].submission_id, 5) + self.assertEqual(logs[0].paper_id, "submit/5") + self.assertFalse(logs[0].notify) + self.assertIsNone(logs[0].document_id) + + +class TestOnEvent(TestCase): + """Functions in :const:`.log.ON_EVENT` are called.""" + + def test_on_event(self): + """Function in :const:`.log.ON_EVENT` is called.""" + mock_handler = mock.MagicMock() + log.ON_EVENT[ConfirmPolicy] = [mock_handler] + user = User(12345, 'joe@joe.joe', username="joeuser", + endorsements=['physics.soc-ph', 'cs.DL']) + event = ConfirmPolicy(creator=user) + before = Submission(creator=user, owner=user, submission_id=42) + after = Submission(creator=user, owner=user, submission_id=42) + log.handle(event, before, after) + self.assertEqual(mock_handler.call_count, 1, + "Handler registered for ConfirmPolicy is called") + + def test_on_event_is_specific(self): + """Function in :const:`.log.ON_EVENT` are specific.""" + mock_handler = mock.MagicMock() + log.ON_EVENT[ConfirmPolicy] = [mock_handler] + user = User(12345, 'joe@joe.joe', username="joeuser", + endorsements=['physics.soc-ph', 'cs.DL']) + event = SetTitle(creator=user, title="foo title") + before = Submission(creator=user, owner=user, submission_id=42) + after = Submission(creator=user, owner=user, submission_id=42) + log.handle(event, before, after) + self.assertEqual(mock_handler.call_count, 0, + "Handler registered for ConfirmPolicy is not called") + + +class TestStoreEvent(TestCase): + """Test log integration when storing event.""" + + def test_store_event(self): + """Log handler is called when an event is stored.""" + mock_handler = mock.MagicMock() + log.ON_EVENT[CreateSubmission] = [mock_handler] + user = User(12345, 'joe@joe.joe', username="joeuser", + endorsements=['physics.soc-ph', 'cs.DL']) + event = CreateSubmission(creator=user, created=datetime.now(UTC)) + before = None + after = Submission(creator=user, owner=user, submission_id=42) + + with in_memory_db(): + store_event(event, before, after) + + self.assertEqual(mock_handler.call_count, 1, + "Handler registered for CreateSubmission is called") diff --git a/core/arxiv/submission/services/classic/tests/test_get_licenses.py b/core/arxiv/submission/services/classic/tests/test_get_licenses.py new file mode 100644 index 0000000..4814e2f --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_get_licenses.py @@ -0,0 +1,45 @@ +"""Tests for retrieving license information.""" + +from unittest import TestCase, mock + +from flask import Flask + +from ....domain.submission import License +from .. import models, get_licenses, current_session +from .util import in_memory_db + + +class TestGetLicenses(TestCase): + """Test :func:`.get_licenses`.""" + + def test_get_all_active_licenses(self): + """Return a :class:`.domain.License` for each active license.""" + # mock_util.json_factory.return_value = SQLiteJSON + + with in_memory_db(): + session = current_session() + session.add(models.License( + name="http://arxiv.org/licenses/assumed-1991-2003", + sequence=9, + label="Assumed arXiv.org perpetual, non-exclusive license to", + active=0 + )) + session.add(models.License( + name="http://creativecommons.org/licenses/publicdomain/", + sequence=4, + label="Creative Commons Public Domain Declaration", + active=1 + )) + session.commit() + licenses = get_licenses() + + self.assertEqual(len(licenses), 1, + "Only the active license should be returned.") + self.assertIsInstance(licenses[0], License, + "Should return License instances.") + self.assertEqual(licenses[0].uri, + "http://creativecommons.org/licenses/publicdomain/", + "Should use name column to populate License.uri") + self.assertEqual(licenses[0].name, + "Creative Commons Public Domain Declaration", + "Should use label column to populate License.name") diff --git a/core/arxiv/submission/services/classic/tests/test_get_submission.py b/core/arxiv/submission/services/classic/tests/test_get_submission.py new file mode 100644 index 0000000..29242e4 --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_get_submission.py @@ -0,0 +1,248 @@ +"""Tests for retrieving submissions.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC +from flask import Flask + +from ....domain.agent import User, System +from ....domain.submission import License, Submission, Author +from ....domain.event import CreateSubmission, \ + FinalizeSubmission, SetPrimaryClassification, AddSecondaryClassification, \ + SetLicense, SetPrimaryClassification, ConfirmPolicy, \ + ConfirmContactInformation, SetTitle, SetAbstract, SetDOI, \ + SetMSCClassification, SetACMClassification, SetJournalReference, \ + SetComments, SetAuthors, Announce, ConfirmAuthorship, ConfirmPolicy, \ + SetUploadPackage +from .. import init_app, create_all, drop_all, models, DBEvent, \ + get_submission, get_user_submissions_fast, current_session, get_licenses, \ + exceptions, store_event, transaction + +from .util import in_memory_db + + +class TestGetSubmission(TestCase): + """Test :func:`.classic.get_submission`.""" + + def test_get_submission_that_does_not_exist(self): + """Test that an exception is raised when submission doesn't exist.""" + with in_memory_db(): + with self.assertRaises(exceptions.NoSuchSubmission): + get_submission(1) + + def test_get_submission_with_publish(self): + """Test that publication state is reflected in submission data.""" + user = User(12345, 'joe@joe.joe', + endorsements=['physics.soc-ph', 'cs.DL']) + + events = [ + CreateSubmission(creator=user), + SetTitle(creator=user, title='Foo title'), + SetAbstract(creator=user, abstract='Indeed' * 10), + SetAuthors(creator=user, authors=[ + Author(order=0, forename='Joe', surname='Bloggs', + email='joe@blo.ggs'), + Author(order=1, forename='Jane', surname='Doe', + email='j@doe.com'), + ]), + SetLicense(creator=user, license_uri='http://foo.org/1.0/', + license_name='Foo zero 1.0'), + SetPrimaryClassification(creator=user, category='cs.DL'), + ConfirmPolicy(creator=user), + SetUploadPackage(creator=user, identifier='12345'), + ConfirmContactInformation(creator=user), + FinalizeSubmission(creator=user) + ] + + with in_memory_db(): + # User creates and finalizes submission. + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events[i] = event + before = after + submission = after + + ident = submission.submission_id + + session = current_session() + # Moderation happens, things change outside the event model. + db_submission = session.query(models.Submission).get(ident) + + # Announced! + db_submission.status = db_submission.ANNOUNCED + db_document = models.Document(paper_id='1901.00123') + db_submission.document = db_document + session.add(db_submission) + session.add(db_document) + session.commit() + + # Now get the submission. + submission_loaded, _ = get_submission(ident) + + self.assertEqual(submission.metadata.title, + submission_loaded.metadata.title, + "Event-derived metadata should be preserved.") + self.assertEqual(submission_loaded.arxiv_id, "1901.00123", + "arXiv paper ID should be set") + self.assertEqual(submission_loaded.status, Submission.ANNOUNCED, + "Submission status should reflect publish action") + + def test_get_submission_with_hold_and_reclass(self): + """Test changes made externally are reflected in submission data.""" + user = User(12345, 'joe@joe.joe', + endorsements=['physics.soc-ph', 'cs.DL']) + events = [ + CreateSubmission(creator=user), + SetTitle(creator=user, title='Foo title'), + SetAbstract(creator=user, abstract='Indeed' * 20), + SetAuthors(creator=user, authors=[ + Author(order=0, forename='Joe', surname='Bloggs', + email='joe@blo.ggs'), + Author(order=1, forename='Jane', surname='Doe', + email='j@doe.com'), + ]), + SetLicense(creator=user, license_uri='http://foo.org/1.0/', + license_name='Foo zero 1.0'), + SetPrimaryClassification(creator=user, category='cs.DL'), + ConfirmPolicy(creator=user), + SetUploadPackage(creator=user, identifier='12345'), + ConfirmContactInformation(creator=user), + FinalizeSubmission(creator=user) + ] + + with in_memory_db(): + # User creates and finalizes submission. + with transaction(): + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events[i] = event + before = after + submission = after + ident = submission.submission_id + + session = current_session() + # Moderation happens, things change outside the event model. + db_submission = session.query(models.Submission).get(ident) + + # Reclassification! + session.delete(db_submission.primary_classification) + session.add(models.SubmissionCategory( + submission_id=ident, category='cs.IR', is_primary=1 + )) + + # On hold! + db_submission.status = db_submission.ON_HOLD + session.add(db_submission) + session.commit() + + # Now get the submission. + submission_loaded, _ = get_submission(ident) + + self.assertEqual(submission.metadata.title, + submission_loaded.metadata.title, + "Event-derived metadata should be preserved.") + self.assertEqual(submission_loaded.primary_classification.category, + "cs.IR", + "Primary classification should reflect the" + " reclassification that occurred outside the purview" + " of the event model.") + self.assertEqual(submission_loaded.status, Submission.SUBMITTED, + "Submission status should still be submitted.") + self.assertTrue(submission_loaded.is_on_hold, + "Hold status should reflect hold action performed" + " outside the purview of the event model.") + + def test_get_submission_list(self): + """Test that the set of submissions for a user can be retrieved.""" + user = User(42, 'adent@example.org', + endorsements=['astro-ph.GA', 'astro-ph.EP']) + events1 = [ + # first submission + CreateSubmission(creator=user), + SetTitle(creator=user, title='Foo title'), + SetAbstract(creator=user, abstract='Indeed' * 20), + SetAuthors(creator=user, authors=[ + Author(order=0, forename='Arthur', surname='Dent', + email='adent@example.org'), + Author(order=1, forename='Ford', surname='Prefect', + email='fprefect@example.org'), + ]), + SetLicense(creator=user, license_uri='http://creativecommons.org/publicdomain/zero/1.0/', + license_name='Foo zero 1.0'), + SetPrimaryClassification(creator=user, category='astro-ph.GA'), + ConfirmPolicy(creator=user), + SetUploadPackage(creator=user, identifier='1'), + ConfirmContactInformation(creator=user), + FinalizeSubmission(creator=user) + ] + events2 = [ + # second submission + CreateSubmission(creator=user), + SetTitle(creator=user, title='Bar title'), + SetAbstract(creator=user, abstract='Indubitably' * 20), + SetAuthors(creator=user, authors=[ + Author(order=0, forename='Jane', surname='Doe', + email='jadoe@example.com'), + Author(order=1, forename='John', surname='Doe', + email='jodoe@example.com'), + ]), + SetLicense(creator=user, license_uri='http://creativecommons.org/publicdomain/zero/1.0/', + license_name='Foo zero 1.0'), + SetPrimaryClassification(creator=user, category='astro-ph.GA'), + ConfirmPolicy(creator=user), + SetUploadPackage(creator=user, identifier='1'), + ConfirmContactInformation(creator=user), + FinalizeSubmission(creator=user) + ] + + with in_memory_db(): + # User creates and finalizes submission. + with transaction(): + before = None + for i, event in enumerate(list(events1)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events1[i] = event + before = after + submission1 = after + ident1 = submission1.submission_id + + before = None + for i, event in enumerate(list(events2)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events2[i] = event + before = after + submission2 = after + ident2 = submission2.submission_id + + classic_sub = models.Submission( + type='new', + submitter_id=42) + session = current_session() + session.add(classic_sub) + + # Now get the submissions for this user. + submissions = get_user_submissions_fast(42) + submission_loaded1, _ = get_submission(ident1) + submission_loaded2, _ = get_submission(ident2) + + self.assertEqual(submission1.metadata.title, + submission_loaded1.metadata.title, + "Event-derived metadata for submission 1 should be preserved.") + self.assertEqual(submission2.metadata.title, + submission_loaded2.metadata.title, + "Event-derived metadata for submission 2 should be preserved.") + + self.assertEqual(len(submissions), + 2, + "There should be exactly two NG submissions.") + diff --git a/core/arxiv/submission/services/classic/tests/test_store_annotations.py b/core/arxiv/submission/services/classic/tests/test_store_annotations.py new file mode 100644 index 0000000..ed0dfd3 --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_store_annotations.py @@ -0,0 +1 @@ +"""Test persistence of annotations in the classic database.""" diff --git a/core/arxiv/submission/services/classic/tests/test_store_event.py b/core/arxiv/submission/services/classic/tests/test_store_event.py new file mode 100644 index 0000000..3fb655b --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_store_event.py @@ -0,0 +1,318 @@ +"""Tests for storing events.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC +from flask import Flask + +from ....domain.agent import User, System +from ....domain.submission import License, Submission, Author +from ....domain.event import CreateSubmission, \ + FinalizeSubmission, SetPrimaryClassification, AddSecondaryClassification, \ + SetLicense, ConfirmPolicy, ConfirmContactInformation, SetTitle, \ + SetAbstract, SetDOI, SetMSCClassification, SetACMClassification, \ + SetJournalReference, SetComments, SetAuthors, Announce, \ + ConfirmAuthorship, SetUploadPackage +from .. import init_app, create_all, drop_all, models, DBEvent, \ + get_submission, current_session, get_licenses, exceptions, store_event, \ + transaction + + +from .util import in_memory_db + + +class TestStoreEvent(TestCase): + """Tests for :func:`.store_event`.""" + + def setUp(self): + """Instantiate a user.""" + self.user = User(12345, 'joe@joe.joe', + endorsements=['physics.soc-ph', 'cs.DL']) + + def test_store_creation(self): + """Store a :class:`CreateSubmission`.""" + with in_memory_db(): + session = current_session() + before = None + event = CreateSubmission(creator=self.user) + event.created = datetime.now(UTC) + after = event.apply(before) + + event, after = store_event(event, before, after) + + db_sb = session.query(models.Submission).get(event.submission_id) + + # Make sure that we get the right submission ID. + self.assertIsNotNone(event.submission_id) + self.assertEqual(event.submission_id, after.submission_id) + self.assertEqual(event.submission_id, db_sb.submission_id) + + self.assertEqual(db_sb.status, models.Submission.NOT_SUBMITTED) + self.assertEqual(db_sb.type, models.Submission.NEW_SUBMISSION) + self.assertEqual(db_sb.version, 1) + + def test_store_events_with_metadata(self): + """Store events and attendant submission with metadata.""" + metadata = { + 'title': 'foo title', + 'abstract': 'very abstract' * 20, + 'comments': 'indeed', + 'msc_class': 'foo msc', + 'acm_class': 'F.2.2; I.2.7', + 'doi': '10.1000/182', + 'journal_ref': 'Nature 1991 2: 1', + 'authors': [Author(order=0, forename='Joe', surname='Bloggs')] + } + with in_memory_db(): + + ev = CreateSubmission(creator=self.user) + ev2 = SetTitle(creator=self.user, title=metadata['title']) + ev3 = SetAbstract(creator=self.user, abstract=metadata['abstract']) + ev4 = SetComments(creator=self.user, comments=metadata['comments']) + ev5 = SetMSCClassification(creator=self.user, + msc_class=metadata['msc_class']) + ev6 = SetACMClassification(creator=self.user, + acm_class=metadata['acm_class']) + ev7 = SetJournalReference(creator=self.user, + journal_ref=metadata['journal_ref']) + ev8 = SetDOI(creator=self.user, doi=metadata['doi']) + events = [ev, ev2, ev3, ev4, ev5, ev6, ev7, ev8] + + with transaction(): + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events[i] = event + before = after + + session = current_session() + db_submission = session.query(models.Submission)\ + .get(after.submission_id) + db_events = session.query(DBEvent).all() + + for key, value in metadata.items(): + if key == 'authors': + continue + self.assertEqual(getattr(db_submission, key), value, + f"The value of {key} should be {value}") + self.assertEqual(db_submission.authors, + after.metadata.authors_display, + "The canonical author string should be used to" + " update the submission in the database.") + + self.assertEqual(len(db_events), 8, + "Eight events should be stored") + for db_event in db_events: + self.assertEqual(db_event.submission_id, after.submission_id, + "The submission id should be set") + + def test_store_events_with_finalized_submission(self): + """Store events and a finalized submission.""" + metadata = { + 'title': 'foo title', + 'abstract': 'very abstract' * 20, + 'comments': 'indeed', + 'msc_class': 'foo msc', + 'acm_class': 'F.2.2; I.2.7', + 'doi': '10.1000/182', + 'journal_ref': 'Nature 1991 2: 1', + 'authors': [Author(order=0, forename='Joe', surname='Bloggs')] + } + with in_memory_db(): + + events = [ + CreateSubmission(creator=self.user), + ConfirmContactInformation(creator=self.user), + ConfirmAuthorship(creator=self.user, submitter_is_author=True), + ConfirmContactInformation(creator=self.user), + ConfirmPolicy(creator=self.user), + SetTitle(creator=self.user, title=metadata['title']), + SetAuthors(creator=self.user, authors=[ + Author(order=0, forename='Joe', surname='Bloggs', + email='joe@blo.ggs'), + Author(order=1, forename='Jane', surname='Doe', + email='j@doe.com'), + ]), + SetAbstract(creator=self.user, abstract=metadata['abstract']), + SetComments(creator=self.user, comments=metadata['comments']), + SetMSCClassification(creator=self.user, + msc_class=metadata['msc_class']), + SetACMClassification(creator=self.user, + acm_class=metadata['acm_class']), + SetJournalReference(creator=self.user, + journal_ref=metadata['journal_ref']), + SetDOI(creator=self.user, doi=metadata['doi']), + SetLicense(creator=self.user, + license_uri='http://foo.org/1.0/', + license_name='Foo zero 1.0'), + SetUploadPackage(creator=self.user, identifier='12345'), + SetPrimaryClassification(creator=self.user, + category='physics.soc-ph'), + FinalizeSubmission(creator=self.user) + ] + + with transaction(): + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events[i] = event + before = after + + session = current_session() + db_submission = session.query(models.Submission) \ + .get(after.submission_id) + db_events = session.query(DBEvent).all() + + self.assertEqual(db_submission.submission_id, after.submission_id, + "The submission should be updated with the PK id") + self.assertEqual(db_submission.status, models.Submission.SUBMITTED, + "Submission should be in submitted state.") + self.assertEqual(len(db_events), len(events), + "%i events should be stored" % len(events)) + for db_event in db_events: + self.assertEqual(db_event.submission_id, after.submission_id, + "The submission id should be set") + + def test_store_doi_jref_with_publication(self): + """:class:`SetDOI` or :class:`SetJournalReference` after pub.""" + metadata = { + 'title': 'foo title', + 'abstract': 'very abstract' * 20, + 'comments': 'indeed', + 'msc_class': 'foo msc', + 'acm_class': 'F.2.2; I.2.7', + 'doi': '10.1000/182', + 'journal_ref': 'Nature 1991 2: 1', + 'authors': [Author(order=0, forename='Joe', surname='Bloggs')] + } + + with in_memory_db(): + events = [ + CreateSubmission(creator=self.user), + ConfirmContactInformation(creator=self.user), + ConfirmAuthorship(creator=self.user, submitter_is_author=True), + ConfirmContactInformation(creator=self.user), + ConfirmPolicy(creator=self.user), + SetTitle(creator=self.user, title=metadata['title']), + SetAuthors(creator=self.user, authors=[ + Author(order=0, forename='Joe', surname='Bloggs', + email='joe@blo.ggs'), + Author(order=1, forename='Jane', surname='Doe', + email='j@doe.com'), + ]), + SetAbstract(creator=self.user, abstract=metadata['abstract']), + SetComments(creator=self.user, comments=metadata['comments']), + SetMSCClassification(creator=self.user, + msc_class=metadata['msc_class']), + SetACMClassification(creator=self.user, + acm_class=metadata['acm_class']), + SetJournalReference(creator=self.user, + journal_ref=metadata['journal_ref']), + SetDOI(creator=self.user, doi=metadata['doi']), + SetLicense(creator=self.user, + license_uri='http://foo.org/1.0/', + license_name='Foo zero 1.0'), + SetUploadPackage(creator=self.user, identifier='12345'), + SetPrimaryClassification(creator=self.user, + category='physics.soc-ph'), + FinalizeSubmission(creator=self.user) + ] + + with transaction(): + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event = store_event(event, before, after) + events[i] = event + before = after + + session = current_session() + # Announced! + paper_id = '1901.00123' + db_submission = session.query(models.Submission) \ + .get(after.submission_id) + db_submission.status = db_submission.ANNOUNCED + db_document = models.Document(paper_id=paper_id) + db_submission.doc_paper_id = paper_id + db_submission.document = db_document + session.add(db_submission) + session.add(db_document) + session.commit() + + # This would normally happen during a load. + pub = Announce(creator=System(__name__), arxiv_id=paper_id, + committed=True) + before = pub.apply(before) + + # Now set DOI + journal ref + doi = '10.1000/182' + journal_ref = 'foo journal 1994' + e3 = SetDOI(creator=self.user, doi=doi, + submission_id=after.submission_id, + created=datetime.now(UTC)) + after = e3.apply(before) + with transaction(): + store_event(e3, before, after) + + e4 = SetJournalReference(creator=self.user, + journal_ref=journal_ref, + submission_id=after.submission_id, + created=datetime.now(UTC)) + before = after + after = e4.apply(before) + with transaction(): + store_event(e4, before, after) + + session = current_session() + # What happened. + db_submission = session.query(models.Submission) \ + .filter(models.Submission.doc_paper_id == paper_id) \ + .order_by(models.Submission.submission_id.desc()) + self.assertEqual(db_submission.count(), 2, + "Creates a second row for the JREF") + db_jref = db_submission.first() + self.assertTrue(db_jref.is_jref()) + self.assertEqual(db_jref.doi, doi) + self.assertEqual(db_jref.journal_ref, journal_ref) + + def test_store_events_with_classification(self): + """Store events including classification.""" + ev = CreateSubmission(creator=self.user) + ev2 = SetPrimaryClassification(creator=self.user, + category='physics.soc-ph') + ev3 = AddSecondaryClassification(creator=self.user, + category='physics.acc-ph') + events = [ev, ev2, ev3] + + with in_memory_db(): + with transaction(): + before = None + for i, event in enumerate(list(events)): + event.created = datetime.now(UTC) + after = event.apply(before) + event, after = store_event(event, before, after) + events[i] = event + before = after + + session = current_session() + db_submission = session.query(models.Submission)\ + .get(after.submission_id) + db_events = session.query(DBEvent).all() + + self.assertEqual(db_submission.submission_id, after.submission_id, + "The submission should be updated with the PK id") + self.assertEqual(len(db_events), 3, + "Three events should be stored") + for db_event in db_events: + self.assertEqual(db_event.submission_id, after.submission_id, + "The submission id should be set") + self.assertEqual(len(db_submission.categories), 2, + "Two category relations should be set") + self.assertEqual(db_submission.primary_classification.category, + after.primary_classification.category, + "Primary classification should be set.") diff --git a/core/arxiv/submission/services/classic/tests/test_store_proposals.py b/core/arxiv/submission/services/classic/tests/test_store_proposals.py new file mode 100644 index 0000000..0d10bf4 --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/test_store_proposals.py @@ -0,0 +1,139 @@ +"""Test persistence of proposals in the classic database.""" + +from unittest import TestCase, mock +from datetime import datetime +from pytz import UTC +from ....domain.event import CreateSubmission, SetPrimaryClassification, \ + AddSecondaryClassification, SetTitle, AddProposal +from ....domain.agent import User +from ....domain.annotation import Comment +from ....domain.submission import Submission +from ....domain.proposal import Proposal +from .. import store_event, models, get_events, current_session, transaction + +from .util import in_memory_db + +from arxiv import taxonomy + + +class TestSaveProposal(TestCase): + """An :class:`AddProposal` event is stored.""" + + def setUp(self): + """Instantiate a user.""" + self.user = User(12345, 'joe@joe.joe', + endorsements=['physics.soc-ph', 'cs.DL']) + + def test_save_reclassification_proposal(self): + """A submission has a new reclassification proposal.""" + with in_memory_db(): + create = CreateSubmission(creator=self.user, + created=datetime.now(UTC)) + before, after = None, create.apply(None) + create, before = store_event(create, before, after) + + event = AddProposal( + creator=self.user, + proposed_event_type=SetPrimaryClassification, + proposed_event_data={ + 'category': taxonomy.Category('cs.DL'), + }, + comment='foo', + created=datetime.now(UTC) + ) + after = event.apply(before) + with transaction(): + event, after = store_event(event, before, after) + + session = current_session() + db_sb = session.query(models.Submission).get(event.submission_id) + + # Make sure that we get the right submission ID. + self.assertIsNotNone(event.submission_id) + self.assertEqual(event.submission_id, after.submission_id) + self.assertEqual(event.submission_id, db_sb.submission_id) + + db_props = session.query(models.CategoryProposal).all() + self.assertEqual(len(db_props), 1) + self.assertEqual(db_props[0].submission_id, after.submission_id) + self.assertEqual(db_props[0].category, 'cs.DL') + self.assertEqual(db_props[0].is_primary, 1) + self.assertEqual(db_props[0].updated.replace(tzinfo=UTC), + event.created) + self.assertEqual(db_props[0].proposal_status, + models.CategoryProposal.UNRESOLVED) + + self.assertEqual(db_props[0].proposal_comment.logtext, + event.comment) + + def test_save_secondary_proposal(self): + """A submission has a new cross-list proposal.""" + with in_memory_db(): + create = CreateSubmission(creator=self.user, + created=datetime.now(UTC)) + before, after = None, create.apply(None) + create, before = store_event(create, before, after) + + event = AddProposal( + creator=self.user, + created=datetime.now(UTC), + proposed_event_type=AddSecondaryClassification, + proposed_event_data={ + 'category': taxonomy.Category('cs.DL'), + }, + comment='foo' + ) + after = event.apply(before) + with transaction(): + event, after = store_event(event, before, after) + + session = current_session() + db_sb = session.query(models.Submission).get(event.submission_id) + + # Make sure that we get the right submission ID. + self.assertIsNotNone(event.submission_id) + self.assertEqual(event.submission_id, after.submission_id) + self.assertEqual(event.submission_id, db_sb.submission_id) + + db_props = session.query(models.CategoryProposal).all() + self.assertEqual(len(db_props), 1) + self.assertEqual(db_props[0].submission_id, after.submission_id) + self.assertEqual(db_props[0].category, 'cs.DL') + self.assertEqual(db_props[0].is_primary, 0) + self.assertEqual(db_props[0].updated.replace(tzinfo=UTC), + event.created) + self.assertEqual(db_props[0].proposal_status, + models.CategoryProposal.UNRESOLVED) + + self.assertEqual(db_props[0].proposal_comment.logtext, + event.comment) + + def test_save_title_proposal(self): + """A submission has a new SetTitle proposal.""" + with in_memory_db(): + create = CreateSubmission(creator=self.user, + created=datetime.now(UTC)) + before, after = None, create.apply(None) + create, before = store_event(create, before, after) + + event = AddProposal( + creator=self.user, + created=datetime.now(UTC), + proposed_event_type=SetTitle, + proposed_event_data={'title': 'the foo title'}, + comment='foo' + ) + after = event.apply(before) + with transaction(): + event, after = store_event(event, before, after) + + session = current_session() + db_sb = session.query(models.Submission).get(event.submission_id) + + # Make sure that we get the right submission ID. + self.assertIsNotNone(event.submission_id) + self.assertEqual(event.submission_id, after.submission_id) + self.assertEqual(event.submission_id, db_sb.submission_id) + + db_props = session.query(models.CategoryProposal).all() + self.assertEqual(len(db_props), 0) diff --git a/core/arxiv/submission/services/classic/tests/util.py b/core/arxiv/submission/services/classic/tests/util.py new file mode 100644 index 0000000..06b799f --- /dev/null +++ b/core/arxiv/submission/services/classic/tests/util.py @@ -0,0 +1,24 @@ +from contextlib import contextmanager + +from flask import Flask + +from .. import init_app, create_all, drop_all, models, DBEvent, \ + get_submission, current_session, get_licenses, exceptions, store_event + + +@contextmanager +def in_memory_db(app=None): + """Provide an in-memory sqlite database for testing purposes.""" + if app is None: + app = Flask('foo') + app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite://' + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + init_app(app) + with app.app_context(): + create_all() + try: + yield + except Exception: + raise + finally: + drop_all() diff --git a/core/arxiv/submission/services/classic/util.py b/core/arxiv/submission/services/classic/util.py new file mode 100644 index 0000000..32942fd --- /dev/null +++ b/core/arxiv/submission/services/classic/util.py @@ -0,0 +1,103 @@ +"""Utility classes and functions for :mod:`.services.classic`.""" + +import json +from contextlib import contextmanager +from typing import Optional, Generator + +from flask import Flask +from sqlalchemy import create_engine +import sqlalchemy.types as types +from sqlalchemy.engine import Engine +from sqlalchemy.orm.session import Session +from sqlalchemy.orm import sessionmaker +from flask_sqlalchemy import SQLAlchemy + +from arxiv.base.globals import get_application_config, get_application_global +from arxiv.base import logging +from .exceptions import ClassicBaseException, TransactionFailed +from ...exceptions import InvalidEvent +from ... import serializer + + +class ClassicSQLAlchemy(SQLAlchemy): + """SQLAlchemy integration for the classic database.""" + + def init_app(self, app: Flask) -> None: + """Set default configuration.""" + app.config.setdefault( + 'SQLALCHEMY_DATABASE_URI', + app.config.get('CLASSIC_DATABASE_URI', 'sqlite://') + ) + app.config.setdefault('SQLALCHEMY_TRACK_MODIFICATIONS', False) + super(ClassicSQLAlchemy, self).init_app(app) + + def apply_pool_defaults(self, app, options): + """Set options for create_engine().""" + super(ClassicSQLAlchemy, self).apply_pool_defaults(app, options) + if app.config['SQLALCHEMY_DATABASE_URI'].startswith('mysql'): + options['json_serializer'] = serializer.dumps + options['json_deserializer'] = serializer.loads + + +db: SQLAlchemy = ClassicSQLAlchemy() + + +logger = logging.getLogger(__name__) + + +class SQLiteJSON(types.TypeDecorator): + """A SQLite-friendly JSON data type.""" + + impl = types.TEXT + + def process_bind_param(self, value: Optional[dict], dialect: str) -> str: + """Serialize a dict to JSON.""" + if value is not None: + value = serializer.dumps(value) + return value + + def process_result_value(self, value: str, dialect: str) -> Optional[dict]: + """Deserialize JSON content to a dict.""" + if value is not None: + value = serializer.loads(value) + return value + + +# SQLite does not support JSON, so we extend JSON to use our custom data type +# as a variant for the 'sqlite' dialect. +FriendlyJSON = types.JSON().with_variant(SQLiteJSON, 'sqlite') + + +def current_engine() -> Engine: + """Get/create :class:`.Engine` for this context.""" + return db.engine + + +def current_session() -> Session: + """Get/create :class:`.Session` for this context.""" + return db.session() + + +@contextmanager +def transaction() -> Generator: + """Context manager for database transaction.""" + session = current_session() + logger.debug('transaction with session %s', id(session)) + try: + yield session + # Only commit if there are un-flushed changes. The caller may commit + # explicitly, e.g. to do exception handling. + if session.dirty or session.deleted or session.new: + session.commit() + logger.debug('committed!') + except ClassicBaseException as e: + logger.debug('Command failed, rolling back: %s', str(e)) + session.rollback() + raise # Propagate exceptions raised from this module. + except InvalidEvent: + session.rollback() + raise + except Exception as e: + logger.debug('Command failed, rolling back: %s', str(e)) + session.rollback() + raise TransactionFailed('Failed to execute transaction') from e diff --git a/core/arxiv/submission/services/classifier/__init__.py b/core/arxiv/submission/services/classifier/__init__.py new file mode 100644 index 0000000..626a146 --- /dev/null +++ b/core/arxiv/submission/services/classifier/__init__.py @@ -0,0 +1,16 @@ +""" +Integration with the classic classifier service. + +The classifier analyzes the text of the specified paper and returns +a list of suggested categories based on similarity comparisons performed +between the text of the paper and statistics for each category. + +Typically used to evaluate article classification prior to review by +moderators. + +Unlike the original arXiv::Classifier module, this module contains no real +business-logic: the objective is simply to provide a user-friendly calling +API. +""" + +from .classifier import Classifier diff --git a/core/arxiv/submission/services/classifier/classifier.py b/core/arxiv/submission/services/classifier/classifier.py new file mode 100644 index 0000000..b2c4841 --- /dev/null +++ b/core/arxiv/submission/services/classifier/classifier.py @@ -0,0 +1,108 @@ +"""Classifier service integration.""" + +from typing import Tuple, List, Any, Union, NamedTuple, Optional +from math import exp, log +from functools import wraps + +from arxiv.base import logging +from arxiv.taxonomy import Category +from arxiv.integration.api import status, service + +logger = logging.getLogger(__name__) + + +class Flag(NamedTuple): + """General-purpose QA flag.""" + + key: str + value: Union[int, str, dict] + + +class Suggestion(NamedTuple): + """A category suggested by the classifier.""" + + category: Category + probability: int + + +class Counts(NamedTuple): + """Various counts of paper content.""" + + chars: int + pages: int + stops: int + words: int + + +class Classifier(service.HTTPIntegration): + """Represents an interface to the classifier service.""" + + VERSION = '0.0' + SERVICE = 'classic' + + ClassifierResponse = Tuple[List[Suggestion], List[Flag], Optional[Counts]] + + class Meta: + """Configuration for :class:`Classifier`.""" + + service_name = "classifier" + + def __init__(self, endpoint: str, verify: bool = True, **params: Any): + super(Classifier, self).__init__(endpoint, verify=verify, **params) + + def is_available(self, **kwargs: Any) -> bool: + """Check our connection to the classifier service.""" + timeout: float = kwargs.get('timeout', 0.2) + try: + self.classify(b'ruok?', timeout=timeout) + except Exception as e: + logger.error('Encountered error calling classifier: %s', e) + return False + return True + + @classmethod + def probability(cls, logodds: float) -> float: + """Convert log odds to a probability.""" + return exp(logodds)/(1 + exp(logodds)) + + def _counts(self, data: dict) -> Optional[Counts]: + """Parse counts from the response data.""" + counts: Optional[Counts] = None + if 'counts' in data: + counts = Counts(**data['counts']) + return counts + + def _flags(self, data: dict) -> List[Flag]: + """Parse flags from the response data.""" + return [ + Flag(key, value) for key, value in data.get('flags', {}).items() + ] + + def _suggestions(self, data: dict) -> List[Suggestion]: + """Parse classification suggestions from the response data.""" + return [Suggestion(category=Category(datum['category']), + probability=self.probability(datum['logodds'])) + for datum in data['classifier']] + + def classify(self, content: bytes, timeout: float = 1.) \ + -> ClassifierResponse: + """ + Make a classification request to the classifier service. + + Parameters + ---------- + content : bytes + Raw text content from an e-print. + + Returns + ------- + list + A list of classifications. + list + A list of QA flags. + :class:`Counts` or None + Feature counts, if provided. + + """ + data, _, _ = self.json('post', '', data=content, timeout=timeout) + return self._suggestions(data), self._flags(data), self._counts(data) diff --git a/core/arxiv/submission/services/classifier/tests/__init__.py b/core/arxiv/submission/services/classifier/tests/__init__.py new file mode 100644 index 0000000..25c6518 --- /dev/null +++ b/core/arxiv/submission/services/classifier/tests/__init__.py @@ -0,0 +1 @@ +"""Tests for classic classifier service integration.""" diff --git a/core/arxiv/submission/services/classifier/tests/data/linenos.json b/core/arxiv/submission/services/classifier/tests/data/linenos.json new file mode 100644 index 0000000..7013c5d --- /dev/null +++ b/core/arxiv/submission/services/classifier/tests/data/linenos.json @@ -0,0 +1 @@ +{"classifier": [{"category": "astro-ph.SR", "logodds": 1.21, "topwords": [{"taurus": 38}, {"tau": 45}, {"single stars": 30}, {"binaries": 34}, {"alma": 37}]}, {"category": "astro-ph.GA", "logodds": 0.84, "topwords": [{"alma": 37}, {"stellar mass": 24}, {"taurus": 38}, {"disk mass": 33}, {"stars": 25}]}, {"category": "astro-ph.EP", "logodds": 0.8, "topwords": [{"disk mass": 33}, {"single stars": 30}, {"alma": 37}, {"binaries": 34}, {"taurus": 38}]}, {"category": "astro-ph.HE", "logodds": 0.29}, {"category": "astro-ph.IM", "logodds": 0.27}], "counts": {"chars": 125436, "pages": 30, "stops": 3774, "words": 34211}, "flags": {"%stop": 0.11, "linenos": 5}} diff --git a/core/arxiv/submission/services/classifier/tests/data/sampleFailedCyrillic.json b/core/arxiv/submission/services/classifier/tests/data/sampleFailedCyrillic.json new file mode 100644 index 0000000..7b98aa7 --- /dev/null +++ b/core/arxiv/submission/services/classifier/tests/data/sampleFailedCyrillic.json @@ -0,0 +1,21 @@ +{ + "classifier":[ + + ], + "counts":{ + "chars":50475, + "pages":8, + "stops":9, + "words":4799 + }, + "flags":{ + "%stop":0.0, + "charset":{ + "cyrillic":2458 + }, + "language":{ + "ru":732 + }, + "stops":9 + } +} diff --git a/core/arxiv/submission/services/classifier/tests/data/sampleResponse.json b/core/arxiv/submission/services/classifier/tests/data/sampleResponse.json new file mode 100644 index 0000000..8d7c6b9 --- /dev/null +++ b/core/arxiv/submission/services/classifier/tests/data/sampleResponse.json @@ -0,0 +1,74 @@ +{ + "classifier": [ + { + "category": "physics.comp-ph", + "logodds": -0.11, + "topwords": [ + { + "processors": 13 + }, + { + "fft": 13 + }, + { + "decyk": 4 + }, + { + "fast fourier transform": 7 + }, + { + "parallel": 10 + } + ] + }, + { + "category": "cs.MS", + "logodds": -0.14, + "topwords": [ + { + "fft": 13 + }, + { + "processors": 13 + }, + { + "fast fourier transform": 7 + }, + { + "parallel": 10 + }, + { + "processor": 7 + } + ] + }, + { + "category": "math.NA", + "logodds": -0.16, + "topwords": [ + { + "fft": 13 + }, + { + "fast fourier transform": 7 + }, + { + "algorithm": 6 + }, + { + "ux": 4 + }, + { + "multiplications": 5 + } + ] + } + ], + "counts": { + "chars": 15107, + "pages": 12, + "stops": 804, + "words": 2860 + }, + "flags": {} +} diff --git a/core/arxiv/submission/services/classifier/tests/tests.py b/core/arxiv/submission/services/classifier/tests/tests.py new file mode 100644 index 0000000..11a8ad9 --- /dev/null +++ b/core/arxiv/submission/services/classifier/tests/tests.py @@ -0,0 +1,205 @@ +"""Tests for classic classifier service integration.""" + +import os +import json +from unittest import TestCase, mock + +from arxiv.integration.api import status, exceptions +from .. import classifier + +DATA_PATH = os.path.join(os.path.split(os.path.abspath(__file__))[0], "data") +SAMPLE_PATH = os.path.join(DATA_PATH, "sampleResponse.json") +LINENOS_PATH = os.path.join(DATA_PATH, "linenos.json") +SAMPLE_FAILED_PATH = os.path.join(DATA_PATH, 'sampleFailedCyrillic.json') + + +mock_app = mock.MagicMock(config={ + 'CLASSIFIER_ENDPOINT': 'http://foohost:1234', + 'CLASSIFIER_VERIFY': False +}) + + +class TestClassifier(TestCase): + """Tests for :class:`classifier.Classifier`.""" + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_with_service_unavailable(self, mock_Session): + """The classifier service is unavailable.""" + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.SERVICE_UNAVAILABLE + ) + ) + ) + with self.assertRaises(exceptions.RequestFailed): + classifier.Classifier('http://foo:9000').classify(b'somecontent') + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_cannot_classify(self, mock_Session): + """The classifier returns without classification suggestions.""" + with open(SAMPLE_FAILED_PATH) as f: + data = json.load(f) + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value=data) + ) + ) + ) + suggestions, flags, counts = \ + classifier.Classifier('http://foo:9000').classify(b'foo') + self.assertEqual(len(suggestions), 0, "There are no suggestions") + self.assertEqual(len(flags), 4, "There are four flags") + self.assertEqual(counts.chars, 50475) + self.assertEqual(counts.pages, 8) + self.assertEqual(counts.stops, 9) + self.assertEqual(counts.words, 4799) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_returns_suggestions(self, mock_Session): + """The classifier returns classification suggestions.""" + with open(SAMPLE_PATH) as f: + data = json.load(f) + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value=data) + ) + ) + ) + expected = { + 'physics.comp-ph': 0.47, + 'cs.MS': 0.47, + 'math.NA': 0.46 + } + suggestions, flags, counts = \ + classifier.Classifier('http://foo:9000').classify(b'foo') + self.assertEqual(len(suggestions), 3, "There are three suggestions") + for suggestion in suggestions: + self.assertEqual(round(suggestion.probability, 2), + expected[suggestion.category]) + self.assertEqual(len(flags), 0, "There are no flags") + self.assertEqual(counts.chars, 15107) + self.assertEqual(counts.pages, 12) + self.assertEqual(counts.stops, 804) + self.assertEqual(counts.words, 2860) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_withlinenos(self, mock_Session): + """The classifier returns classification suggestions.""" + with open(LINENOS_PATH) as f: + data = json.load(f) + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value=data) + ) + ) + ) + expected = { + 'astro-ph.SR': 0.77, + 'astro-ph.GA': 0.7, + 'astro-ph.EP': 0.69, + 'astro-ph.HE': 0.57, + 'astro-ph.IM': 0.57 + + } + + suggestions, flags, counts = \ + classifier.Classifier('http://foo:9000').classify(b'foo') + self.assertEqual(len(suggestions), 5, "There are five suggestions") + for suggestion in suggestions: + self.assertEqual( + round(suggestion.probability, 2), + expected[suggestion.category], + "Expected probability of %s for %s" % + (expected[suggestion.category], suggestion.category) + ) + self.assertEqual(len(flags), 2, "There are two flags") + self.assertIn("%stop", [flag.key for flag in flags]) + self.assertIn("linenos", [flag.key for flag in flags]) + self.assertEqual(counts.chars, 125436) + self.assertEqual(counts.pages, 30) + self.assertEqual(counts.stops, 3774) + self.assertEqual(counts.words, 34211) + + +class TestClassifierModule(TestCase): + """Tests for :mod:`classifier`.""" + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_unavailable(self, mock_Session): + """The classifier service is unavailable.""" + mock_post = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.SERVICE_UNAVAILABLE + ) + ) + mock_Session.return_value = mock.MagicMock(post=mock_post) + with self.assertRaises(exceptions.RequestFailed): + classifier.Classifier.classify(b'somecontent') + endpoint = f'http://foohost:1234/ctxt' + self.assertEqual(mock_post.call_args[0][0], endpoint) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_cannot_classify(self, mock_Session): + """The classifier returns without classification suggestions.""" + with open(SAMPLE_FAILED_PATH) as f: + data = json.load(f) + mock_post = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value=data) + ) + ) + mock_Session.return_value = mock.MagicMock(post=mock_post) + suggestions, flags, counts = classifier.Classifier.classify(b'foo') + self.assertEqual(len(suggestions), 0, "There are no suggestions") + self.assertEqual(len(flags), 4, "There are four flags") + self.assertEqual(counts.chars, 50475) + self.assertEqual(counts.pages, 8) + self.assertEqual(counts.stops, 9) + self.assertEqual(counts.words, 4799) + endpoint = f'http://foohost:1234/ctxt' + self.assertEqual(mock_post.call_args[0][0], endpoint) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_classifier_returns_suggestions(self, mock_Session): + """The classifier returns classification suggestions.""" + with open(SAMPLE_PATH) as f: + data = json.load(f) + mock_post = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value=data) + ) + ) + mock_Session.return_value = mock.MagicMock(post=mock_post) + expected = { + 'physics.comp-ph': 0.47, + 'cs.MS': 0.47, + 'math.NA': 0.46 + } + suggestions, flags, counts = classifier.Classifier.classify(b'foo') + self.assertEqual(len(suggestions), 3, "There are three suggestions") + for suggestion in suggestions: + self.assertEqual(round(suggestion.probability, 2), + expected[suggestion.category]) + self.assertEqual(len(flags), 0, "There are no flags") + self.assertEqual(counts.chars, 15107) + self.assertEqual(counts.pages, 12) + self.assertEqual(counts.stops, 804) + self.assertEqual(counts.words, 2860) + endpoint = f'http://foohost:1234/ctxt' + self.assertEqual(mock_post.call_args[0][0], endpoint) diff --git a/core/arxiv/submission/services/compiler/__init__.py b/core/arxiv/submission/services/compiler/__init__.py new file mode 100644 index 0000000..1c8e0b2 --- /dev/null +++ b/core/arxiv/submission/services/compiler/__init__.py @@ -0,0 +1,3 @@ +"""Integration with the compiler service API.""" + +from .compiler import Compiler, get_task_id, split_task_id, CompilationFailed diff --git a/core/arxiv/submission/services/compiler/compiler.py b/core/arxiv/submission/services/compiler/compiler.py new file mode 100644 index 0000000..50c7e41 --- /dev/null +++ b/core/arxiv/submission/services/compiler/compiler.py @@ -0,0 +1,246 @@ +""" +Integration with the compiler service API. + +The compiler is responsible for building PDF, DVI, and other goodies from +LaTeX sources. In the submission UI, we specifically want to build a PDF so +that the user can preview their submission. Additionally, we want to show the +submitter the TeX log so that they can identify any potential problems with +their sources. +""" +from typing import Tuple, Optional, List, Union, NamedTuple, Mapping, Any +import json +import io +import re +from enum import Enum +from functools import wraps +from collections import defaultdict +from urllib.parse import urlparse, urlunparse, urlencode + +import dateutil.parser + +from werkzeug.datastructures import FileStorage +import requests + +from arxiv.base import logging +from arxiv.integration.api import status, service + +from ...domain.compilation import Compilation, CompilationProduct, \ + CompilationLog + + +logger = logging.getLogger(__name__) + +PDF = Compilation.Format.PDF + + +class CompilationFailed(RuntimeError): + """The compilation service failed to compile the source package.""" + + +class Compiler(service.HTTPIntegration): + """Encapsulates a connection with the compiler service.""" + + VERSION = "0.1" + """Verison of the compiler service with which we are integrating.""" + + NAME = "arxiv-compiler" + """Name of the compiler service with which we are integrating.""" + + class Meta: + """Configuration for :class:`Classifier`.""" + + service_name = "compiler" + + def is_available(self, **kwargs: Any) -> bool: + """Check our connection to the compiler service.""" + timeout: float = kwargs.get('timeout', 0.2) + try: + self.get_service_status(timeout=timeout) + except Exception as e: + logger.error('Encountered error calling compiler: %s', e) + return False + return True + + def _parse_status_response(self, data: dict) -> Compilation: + return Compilation( + source_id=data['source_id'], + checksum=data['checksum'], + output_format=Compilation.Format(data['output_format']), + status=Compilation.Status(data['status']), + reason=Compilation.Reason(data.get('reason', None)), + description=data.get('description', None), + size_bytes=data.get('size_bytes', 0) + ) + + def _parse_loc(self, headers: Mapping) -> str: + return urlparse(headers['Location']).path + + def get_service_status(self, timeout: float = 0.2) -> dict: + """Get the status of the compiler service.""" + return self.json('get', 'status', timeout=timeout)[0] + + def compile(self, source_id: str, checksum: str, token: str, + stamp_label: str, stamp_link: str, + compiler: Optional[Compilation.SupportedCompiler] = None, + output_format: Compilation.Format = PDF, + force: bool = False) -> Compilation: + """ + Request compilation for an upload workspace. + + Unless ``force`` is ``True``, the compiler service will only attempt + to compile a source ID + checksum + format combo once. If there is + already a compilation underway or complete for the parameters in this + request, the service will redirect to the corresponding status URI. + Hence the data returned by this function may be from the response to + the initial POST request, or from the status endpoint after being + redirected. + + Parameters + ---------- + source_id : int + Unique identifier for the upload workspace. + checksum : str + State up of the upload workspace. + token : str + The original (encrypted) auth token on the request. Used to perform + subrequests to the file management service. + stamp_label : str + Label to use in PS/PDF stamp/watermark. Form is + 'Identifier [Category Date]' + Category and Date are optional. By default Date will be added + by compiler. + stamp_link : str + Link (URI) to use in PS/PDF stamp/watermark. + compiler : :class:`.Compiler` or None + Name of the preferred compiler. + output_format : :class:`.Format` + Defaults to :attr:`.Format.PDF`. + force : bool + If True, compilation will be forced even if it has been attempted + with these parameters previously. Default is ``False``. + + Returns + ------- + :class:`Compilation` + The current state of the compilation. + + """ + logger.debug("Requesting compilation for %s @ %s: %s", + source_id, checksum, output_format) + payload = {'source_id': source_id, 'checksum': checksum, + 'stamp_label': stamp_label, 'stamp_link': stamp_link, + 'format': output_format.value, 'force': force} + endpoint = '/' + expected_codes = [status.OK, status.ACCEPTED, + status.SEE_OTHER, status.FOUND] + data, _, headers = self.json('post', endpoint, token, json=payload, + expected_code=expected_codes) + return self._parse_status_response(data) + + def get_status(self, source_id: str, checksum: str, token: str, + output_format: Compilation.Format = PDF) -> Compilation: + """ + Get the status of a compilation. + + Parameters + ---------- + source_id : int + Unique identifier for the upload workspace. + checksum : str + State up of the upload workspace. + output_format : :class:`.Format` + Defaults to :attr:`.Format.PDF`. + + Returns + ------- + :class:`Compilation` + The current state of the compilation. + + """ + endpoint = f'/{source_id}/{checksum}/{output_format.value}' + data, _, headers = self.json('get', endpoint, token) + return self._parse_status_response(data) + + def compilation_is_complete(self, source_id: str, checksum: str, + token: str, + output_format: Compilation.Format) -> bool: + """Check whether compilation has completed successfully.""" + stat = self.get_status(source_id, checksum, token, output_format) + if stat.status is Compilation.Status.SUCCEEDED: + return True + elif stat.status is Compilation.Status.FAILED: + raise CompilationFailed('Compilation failed') + return False + + def get_product(self, source_id: str, checksum: str, token: str, + output_format: Compilation.Format = PDF) \ + -> CompilationProduct: + """ + Get the compilation product for an upload workspace, if it exists. + + Parameters + ---------- + source_id : int + Unique identifier for the upload workspace. + checksum : str + State up of the upload workspace. + output_format : :class:`.Format` + Defaults to :attr:`.Format.PDF`. + + Returns + ------- + :class:`CompilationProduct` + The compilation product itself. + + """ + endpoint = f'/{source_id}/{checksum}/{output_format.value}/product' + response = self.request('get', endpoint, token, stream=True) + return CompilationProduct(content_type=output_format.content_type, + stream=io.BytesIO(response.content)) + + def get_log(self, source_id: str, checksum: str, token: str, + output_format: Compilation.Format = PDF) -> CompilationLog: + """ + Get the compilation log for an upload workspace, if it exists. + + Parameters + ---------- + source_id : int + Unique identifier for the upload workspace. + checksum : str + State up of the upload workspace. + output_format : :class:`.Format` + Defaults to :attr:`.Format.PDF`. + + Returns + ------- + :class:`CompilationProduct` + The compilation product itself. + + """ + endpoint = f'/{source_id}/{checksum}/{output_format.value}/log' + response = self.request('get', endpoint, token, stream=True) + return CompilationLog(stream=io.BytesIO(response.content)) + + +def get_task_id(source_id: str, checksum: str, + output_format: Compilation.Format) -> str: + """Generate a key for a /checksum/format combination.""" + return f"{source_id}/{checksum}/{output_format.value}" + + +def split_task_id(task_id: str) -> Tuple[str, str, Compilation.Format]: + source_id, checksum, format_value = task_id.split("/") + return source_id, checksum, Compilation.Format(format_value) + + +class Download(object): + """Wrapper around response content.""" + + def __init__(self, response: requests.Response) -> None: + """Initialize with a :class:`requests.Response` object.""" + self._response = response + + def read(self, *args, **kwargs) -> bytes: + """Read response content.""" + return self._response.content diff --git a/core/arxiv/submission/services/compiler/tests.py b/core/arxiv/submission/services/compiler/tests.py new file mode 100644 index 0000000..381a986 --- /dev/null +++ b/core/arxiv/submission/services/compiler/tests.py @@ -0,0 +1,218 @@ +"""Tests for :mod:`.compiler`.""" + +from unittest import TestCase, mock + +from . import compiler +from ... import domain + +from arxiv.integration.api import status, exceptions + + +mock_app = mock.MagicMock(config={ + 'COMPILER_ENDPOINT': 'http://foohost:1234', + 'COMPILER_VERIFY': False +}) + + +class TestRequestCompilation(TestCase): + """Tests for :mod:`compiler.compile` with mocked responses.""" + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_compile(self, mock_Session): + """Request compilation of an upload workspace.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + location = f'http://asdf/{source_id}/{checksum}/{output_format.value}' + mock_session = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.ACCEPTED, + json=mock.MagicMock(return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.IN_PROGRESS.value + }), + headers={'Location': location} + ) + ), + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.IN_PROGRESS.value + }), + headers={'Location': location} + ) + ) + ) + mock_Session.return_value = mock_session + + comp_status = compiler.Compiler.compile(source_id, checksum, 'footok', + 'theLabel', 'http://the.link') + self.assertEqual(comp_status.source_id, source_id) + self.assertEqual(comp_status.identifier, + f"{source_id}/{checksum}/{output_format.value}") + self.assertEqual(comp_status.status, + domain.compilation.Compilation.Status.IN_PROGRESS) + self.assertEqual(mock_session.post.call_count, 1) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_compile_redirects(self, mock_Session): + """Request compilation of an upload workspace already processing.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + + location = f'http://asdf/{source_id}/{checksum}/{output_format.value}' + mock_session = mock.MagicMock( + post=mock.MagicMock( # Redirected + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.IN_PROGRESS.value + } + ) + ) + ) + ) + mock_Session.return_value = mock_session + comp_status = compiler.Compiler.compile(source_id, checksum, 'footok', + 'theLabel', 'http://the.link') + self.assertEqual(comp_status.source_id, source_id) + self.assertEqual(comp_status.identifier, + f"{source_id}/{checksum}/{output_format.value}") + self.assertEqual(comp_status.status, + domain.compilation.Compilation.Status.IN_PROGRESS) + self.assertEqual(mock_session.post.call_count, 1) + + +class TestGetTaskStatus(TestCase): + """Tests for :mod:`compiler.get_status` with mocked responses.""" + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_get_status_failed(self, mock_Session): + """Get the status of a failed task.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + + mock_session = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.FAILED.value + } + ) + ) + ) + ) + mock_Session.return_value = mock_session + comp_status = compiler.Compiler.get_status(source_id, checksum, + 'tok', output_format) + self.assertEqual(comp_status.source_id, source_id) + self.assertEqual(comp_status.identifier, + f"{source_id}/{checksum}/{output_format.value}") + self.assertEqual(comp_status.status, domain.compilation.Compilation.Status.FAILED) + self.assertEqual(mock_session.get.call_count, 1) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_get_status_in_progress(self, mock_Session): + """Get the status of an in-progress task.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + mock_session = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.IN_PROGRESS.value + } + ) + ) + ) + ) + mock_Session.return_value = mock_session + comp_status = compiler.Compiler.get_status(source_id, checksum, + 'tok', output_format) + self.assertEqual(comp_status.source_id, source_id) + self.assertEqual(comp_status.identifier, + f"{source_id}/{checksum}/{output_format.value}") + self.assertEqual(comp_status.status, domain.compilation.Compilation.Status.IN_PROGRESS) + self.assertEqual(mock_session.get.call_count, 1) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_get_status_completed(self, mock_Session): + """Get the status of a completed task.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + + mock_session = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={ + 'source_id': source_id, + 'checksum': checksum, + 'output_format': output_format.value, + 'status': domain.compilation.Compilation.Status.SUCCEEDED.value + } + ) + ) + ) + ) + mock_Session.return_value = mock_session + comp_status = compiler.Compiler.get_status(source_id, checksum, + 'tok', output_format) + self.assertEqual(comp_status.source_id, source_id) + self.assertEqual(comp_status.identifier, + f"{source_id}/{checksum}/{output_format.value}") + self.assertEqual(comp_status.status, domain.compilation.Compilation.Status.SUCCEEDED) + self.assertEqual(mock_session.get.call_count, 1) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_get_status_doesnt_exist(self, mock_Session): + """Get the status of a task that does not exist.""" + source_id = 42 + checksum = 'asdf1234=' + output_format = domain.compilation.Compilation.Format.PDF + mock_session = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.NOT_FOUND, + json=mock.MagicMock( + return_value={} + ) + ) + ) + ) + mock_Session.return_value = mock_session + with self.assertRaises(exceptions.NotFound): + compiler.Compiler.get_status(source_id, checksum, 'footok', + output_format) diff --git a/core/arxiv/submission/services/plaintext/__init__.py b/core/arxiv/submission/services/plaintext/__init__.py new file mode 100644 index 0000000..5426f7c --- /dev/null +++ b/core/arxiv/submission/services/plaintext/__init__.py @@ -0,0 +1,3 @@ +"""Service integration module for plain text extraction.""" + +from .plaintext import PlainTextService, ExtractionFailed diff --git a/core/arxiv/submission/services/plaintext/plaintext.py b/core/arxiv/submission/services/plaintext/plaintext.py new file mode 100644 index 0000000..2d52315 --- /dev/null +++ b/core/arxiv/submission/services/plaintext/plaintext.py @@ -0,0 +1,158 @@ +""" +Provides integration with the plaintext extraction service. + +This integration is focused on usage patterns required by the submission +system. Specifically: + +1. Must be able to request an extraction for a compiled submission. +2. Must be able to poll whether the extraction has completed. +3. Must be able to retrieve the raw binary content from when the extraction + has finished successfully. +4. Encounter an informative exception if something goes wrong. + +This represents only a subset of the functionality provided by the plaintext +service itself. +""" + +from enum import Enum +from typing import Any + +from arxiv.base import logging +from arxiv.integration.api import status, exceptions, service +from arxiv.taxonomy import Category + +logger = logging.getLogger(__name__) + + +class ExtractionFailed(exceptions.RequestFailed): + """The plain text extraction service failed to extract text.""" + + +class ExtractionInProgress(exceptions.RequestFailed): + """An extraction is already in progress.""" + + +class PlainTextService(service.HTTPIntegration): + """Represents an interface to the plain text extraction service.""" + + VERSION = 0.3 + """Version of the service for which this module is implemented.""" + + class Meta: + """Configuration for :class:`Classifier`.""" + + service_name = "plaintext" + + class Status(Enum): + """Task statuses.""" + + IN_PROGRESS = 'in_progress' + SUCCEEDED = 'succeeded' + FAILED = 'failed' + + @property + def _base_endpoint(self) -> str: + return f'{self._scheme}://{self._host}:{self._port}' + + def is_available(self, **kwargs: Any) -> bool: + """Check our connection to the plain text service.""" + timeout: float = kwargs.get('timeout', 0.2) + try: + response = self.request('head', '/status', timeout=timeout) + except Exception as e: + logger.error('Encountered error calling plain text service: %s', e) + return False + if response.status_code != status.OK: + logger.error('Got unexpected status: %s', response.status_code) + return False + return True + + def endpoint(self, source_id: str) -> str: + """Get the URL of the extraction endpoint.""" + return f'/submission/{source_id}' + + def status_endpoint(self, source_id: str) -> str: + """Get the URL of the extraction status endpoint.""" + return f'/submission/{source_id}/status' + + def request_extraction(self, source_id: str) -> None: + """ + Make a request for plaintext extraction using the submission upload ID. + + Parameters + ---------- + source_id : str + ID of the submission upload workspace. + + """ + expected_code = [status.OK, status.ACCEPTED, + status.SEE_OTHER] + response = self.request('post', self.endpoint(source_id), + expected_code=expected_code) + if response.status_code == status.SEE_OTHER: + raise ExtractionInProgress('Extraction already exists', response) + elif response.status_code not in expected_code: + raise exceptions.RequestFailed('Unexpected status', response) + return + + def extraction_is_complete(self, source_id: str) -> bool: + """ + Check the status of an extraction task by submission upload ID. + + Parameters + ---------- + source_id : str + ID of the submission upload workspace. + + Returns + ------- + bool + + Raises + ------ + :class:`ExtractionFailed` + Raised if the task is in a failed state, or an unexpected condition + is encountered. + + """ + endpoint = self.status_endpoint(source_id) + expected_code = [status.OK, status.SEE_OTHER] + response = self.request('get', endpoint, allow_redirects=False, + expected_code=expected_code) + data = response.json() + if response.status_code == status.SEE_OTHER: + return True + elif self.Status(data['status']) is self.Status.IN_PROGRESS: + return False + elif self.Status(data['status']) is self.Status.FAILED: + raise ExtractionFailed('Extraction failed', response) + raise ExtractionFailed('Unexpected state', response) + + def retrieve_content(self, source_id: str) -> bytes: + """ + Retrieve plain text content by submission upload ID. + + Parameters + ---------- + source_id : str + ID of the submission upload workspace. + + Returns + ------- + bytes + Raw text content. + + Raises + ------ + :class:`RequestFailed` + Raised if an unexpected status was encountered. + :class:`ExtractionInProgress` + Raised if an extraction is currently in progress + + """ + expected_code = [status.OK, status.SEE_OTHER] + response = self.request('get', self.endpoint(source_id), + expected_code=expected_code) + if response.status_code == status.SEE_OTHER: + raise ExtractionInProgress('Extraction is in progress', response) + return response.content diff --git a/core/arxiv/submission/services/plaintext/tests.py b/core/arxiv/submission/services/plaintext/tests.py new file mode 100644 index 0000000..c1c846d --- /dev/null +++ b/core/arxiv/submission/services/plaintext/tests.py @@ -0,0 +1,573 @@ +"""Tests for :mod:`arxiv.submission.services.plaintext`.""" + +from unittest import TestCase, mock + +from arxiv.integration.api import exceptions, status +from . import plaintext + +mock_app = mock.MagicMock(config={ + 'PLAINTEXT_ENDPOINT': 'http://foohost:5432', + 'PLAINTEXT_VERIFY': False +}) + + +class TestPlainTextService(TestCase): + """Tests for :class:`.plaintext.PlainTextService`.""" + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_already_in_progress(self, mock_Session): + """A plaintext extraction is already in progress.""" + mock_post = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.SEE_OTHER, + json=mock.MagicMock(return_value={}), + headers={'Location': '...'} + ) + ) + mock_Session.return_value = mock.MagicMock(post=mock_post) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + with self.assertRaises(plaintext.ExtractionInProgress): + service.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction(self, mock_Session): + """Extraction is successfully requested.""" + mock_session = mock.MagicMock(**{ + 'post': mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.ACCEPTED, + json=mock.MagicMock(return_value={}), + content='', + headers={'Location': '/somewhere'} + ) + ), + 'get': mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={'reason': 'extraction in process'} + ), + content="{'reason': 'fulltext extraction in process'}", + headers={} + ) + ) + }) + mock_Session.return_value = mock_session + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + self.assertIsNone(service.request_extraction(source_id)) + self.assertEqual( + mock_session.post.call_args[0][0], + 'http://foohost:8123/submission/132456' + ) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction_bad_request(self, mock_Session): + """Service returns 400 Bad Request.""" + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.BAD_REQUEST, + json=mock.MagicMock(return_value={ + 'reason': 'something is not quite right' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.BadRequest): + service.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction_server_error(self, mock_Session): + """Service returns 500 Internal Server Error.""" + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.INTERNAL_SERVER_ERROR, + json=mock.MagicMock(return_value={ + 'reason': 'something is not quite right' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestFailed): + service.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.UNAUTHORIZED, + json=mock.MagicMock(return_value={ + 'reason': 'who are you' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestUnauthorized): + service.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = mock.MagicMock( + post=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.FORBIDDEN, + json=mock.MagicMock(return_value={ + 'reason': 'you do not have sufficient authz' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestForbidden): + service.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_is_complete(self, mock_Session): + """Extraction is indeed complete.""" + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.SEE_OTHER, + json=mock.MagicMock(return_value={}), + headers={'Location': '...'} + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + self.assertTrue(service.extraction_is_complete(source_id)) + self.assertEqual( + mock_get.call_args[0][0], + 'http://foohost:8123/submission/132456/status' + ) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_in_progress(self, mock_Session): + """Extraction is still in progress.""" + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value={'status': 'in_progress'}) + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + self.assertFalse(service.extraction_is_complete(source_id)) + self.assertEqual( + mock_get.call_args[0][0], + 'http://foohost:8123/submission/132456/status' + ) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_failed(self, mock_Session): + """Extraction failed.""" + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock(return_value={'status': 'failed'}) + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + with self.assertRaises(plaintext.ExtractionFailed): + self.assertFalse(service.extraction_is_complete(source_id)) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_complete_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.UNAUTHORIZED, + json=mock.MagicMock(return_value={ + 'reason': 'who are you' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestUnauthorized): + service.extraction_is_complete(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_complete_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.FORBIDDEN, + json=mock.MagicMock(return_value={ + 'reason': 'you do not have sufficient authz' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestForbidden): + service.extraction_is_complete(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.UNAUTHORIZED, + json=mock.MagicMock(return_value={ + 'reason': 'who are you' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestUnauthorized): + service.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = mock.MagicMock( + get=mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.FORBIDDEN, + json=mock.MagicMock(return_value={ + 'reason': 'you do not have sufficient authz' + }) + ) + ) + ) + source_id = '132456' + service = plaintext.PlainTextService('foohost', 8000) + with self.assertRaises(exceptions.RequestForbidden): + service.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve(self, mock_Session): + """Retrieval is successful.""" + content = b'thisisthecontent' + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + content=content + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + self.assertEqual(service.retrieve_content(source_id), content, + "Returns binary content as received") + self.assertEqual( + mock_get.call_args[0][0], + 'http://foohost:8123/submission/132456' + ) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_nonexistant(self, mock_Session): + """There is no such plaintext resource.""" + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.NOT_FOUND, + json=mock.MagicMock(return_value={'reason': 'no such thing'}) + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + with self.assertRaises(exceptions.NotFound): + service.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_in_progress(self, mock_Session): + """There is no such plaintext resource.""" + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.SEE_OTHER, + json=mock.MagicMock(return_value={}), + headers={'Location': '...'} + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + service = plaintext.PlainTextService('http://foohost:8123') + with self.assertRaises(plaintext.ExtractionInProgress): + service.retrieve_content(source_id) + + +class TestPlainTextServiceModule(TestCase): + """Tests for :mod:`.services.plaintext`.""" + + def session(self, status_code=status.OK, method="get", json={}, + content="", headers={}): + """Make a mock session.""" + return mock.MagicMock(**{ + method: mock.MagicMock( + return_value=mock.MagicMock( + status_code=status_code, + json=mock.MagicMock( + return_value=json + ), + content=content, + headers=headers + ) + ) + }) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_already_in_progress(self, mock_Session): + """A plaintext extraction is already in progress.""" + mock_Session.return_value = self.session( + status_code=status.SEE_OTHER, + method='post', + headers={'Location': '...'} + ) + + source_id = '132456' + with self.assertRaises(plaintext.ExtractionInProgress): + plaintext.PlainTextService.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction(self, mock_Session): + """Extraction is successfully requested.""" + mock_session = mock.MagicMock(**{ + 'post': mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.ACCEPTED, + json=mock.MagicMock(return_value={}), + content='', + headers={'Location': '/somewhere'} + ) + ), + 'get': mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + json=mock.MagicMock( + return_value={'reason': 'extraction in process'} + ), + content="{'reason': 'fulltext extraction in process'}", + headers={} + ) + ) + }) + mock_Session.return_value = mock_session + source_id = '132456' + self.assertIsNone( + plaintext.PlainTextService.request_extraction(source_id) + ) + self.assertEqual(mock_session.post.call_args[0][0], + 'http://foohost:5432/submission/132456') + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_bad_request(self, mock_Session): + """Service returns 400 Bad Request.""" + mock_Session.return_value = self.session( + status_code=status.BAD_REQUEST, + method='post', + json={'reason': 'something is not quite right'} + ) + source_id = '132456' + with self.assertRaises(exceptions.BadRequest): + plaintext.PlainTextService.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_server_error(self, mock_Session): + """Service returns 500 Internal Server Error.""" + mock_Session.return_value = self.session( + status_code=status.INTERNAL_SERVER_ERROR, + method='post', + json={'reason': 'something is not quite right'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestFailed): + plaintext.PlainTextService.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = self.session( + status_code=status.UNAUTHORIZED, + method='post', + json={'reason': 'who are you'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestUnauthorized): + plaintext.PlainTextService.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_request_extraction_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = self.session( + status_code=status.FORBIDDEN, + method='post', + json={'reason': 'you do not have sufficient authz'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestForbidden): + plaintext.PlainTextService.request_extraction(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_is_complete(self, mock_Session): + """Extraction is indeed complete.""" + mock_session = self.session( + status_code=status.SEE_OTHER, + headers={'Location': '...'} + ) + mock_Session.return_value = mock_session + source_id = '132456' + self.assertTrue(plaintext.PlainTextService.extraction_is_complete(source_id)) + self.assertEqual(mock_session.get.call_args[0][0], + 'http://foohost:5432/submission/132456/status') + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_in_progress(self, mock_Session): + """Extraction is still in progress.""" + mock_session = self.session( + json={'status': 'in_progress'} + ) + mock_Session.return_value = mock_session + source_id = '132456' + self.assertFalse(plaintext.PlainTextService.extraction_is_complete(source_id)) + self.assertEqual(mock_session.get.call_args[0][0], + 'http://foohost:5432/submission/132456/status') + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_extraction_failed(self, mock_Session): + """Extraction failed.""" + mock_Session.return_value = self.session(json={'status': 'failed'}) + source_id = '132456' + with self.assertRaises(plaintext.ExtractionFailed): + self.assertFalse(plaintext.PlainTextService.extraction_is_complete(source_id)) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_complete_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = self.session( + status_code=status.UNAUTHORIZED, + json={'reason': 'who are you'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestUnauthorized): + plaintext.PlainTextService.extraction_is_complete(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_complete_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = self.session( + status_code=status.FORBIDDEN, + json={'reason': 'you do not have sufficient authz'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestForbidden): + plaintext.PlainTextService.extraction_is_complete(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_unauthorized(self, mock_Session): + """Service returns 401 Unauthorized.""" + mock_Session.return_value = self.session( + status_code=status.UNAUTHORIZED, + json={'reason': 'who are you'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestUnauthorized): + plaintext.PlainTextService.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_forbidden(self, mock_Session): + """Service returns 403 Forbidden.""" + mock_Session.return_value = self.session( + status_code=status.FORBIDDEN, + json={'reason': 'you do not have sufficient authz'} + ) + source_id = '132456' + with self.assertRaises(exceptions.RequestForbidden): + plaintext.PlainTextService.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve(self, mock_Session): + """Retrieval is successful.""" + content = b'thisisthecontent' + mock_get = mock.MagicMock( + return_value=mock.MagicMock( + status_code=status.OK, + content=content + ) + ) + mock_Session.return_value = mock.MagicMock(get=mock_get) + source_id = '132456' + self.assertEqual( + plaintext.PlainTextService.retrieve_content(source_id), + content, + "Returns binary content as received" + ) + self.assertEqual(mock_get.call_args[0][0], + 'http://foohost:5432/submission/132456') + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_nonexistant(self, mock_Session): + """There is no such plaintext resource.""" + mock_Session.return_value = self.session( + status_code=status.NOT_FOUND, + json={'reason': 'no such thing'} + ) + source_id = '132456' + with self.assertRaises(exceptions.NotFound): + plaintext.PlainTextService.retrieve_content(source_id) + + @mock.patch('arxiv.integration.api.service.current_app', mock_app) + @mock.patch('arxiv.integration.api.service.requests.Session') + def test_retrieve_in_progress(self, mock_Session): + """There is no such plaintext resource.""" + mock_Session.return_value = self.session( + status_code=status.SEE_OTHER, + headers={'Location': '...'} + ) + source_id = '132456' + with self.assertRaises(plaintext.ExtractionInProgress): + plaintext.PlainTextService.retrieve_content(source_id) diff --git a/core/arxiv/submission/services/stream/__init__.py b/core/arxiv/submission/services/stream/__init__.py new file mode 100644 index 0000000..f7fcf5b --- /dev/null +++ b/core/arxiv/submission/services/stream/__init__.py @@ -0,0 +1,3 @@ +"""Emits events to the submission stream.""" + +from .stream import StreamPublisher diff --git a/core/arxiv/submission/services/stream/stream.py b/core/arxiv/submission/services/stream/stream.py new file mode 100644 index 0000000..348e4dd --- /dev/null +++ b/core/arxiv/submission/services/stream/stream.py @@ -0,0 +1,126 @@ +from typing import Optional + +import boto3 +from botocore.exceptions import ClientError +from retry import retry + +from arxiv.integration.meta import MetaIntegration +from arxiv.base import logging +from arxiv.base.globals import get_application_config, get_application_global + +from ...domain import Submission, Event +from ...serializer import dumps + +logger = logging.getLogger(__name__) + + +class StreamPublisher(metaclass=MetaIntegration): + def __init__(self, stream: str, partition_key: str, + aws_access_key_id: str, aws_secret_access_key: str, + region_name: str, endpoint_url: Optional[str] = None, + verify: bool = True) -> None: + self.stream = stream + self.partition_key = partition_key + self.client = boto3.client('kinesis', + region_name=region_name, + endpoint_url=endpoint_url, + aws_access_key_id=aws_access_key_id, + aws_secret_access_key=aws_secret_access_key, + verify=verify) + + @classmethod + def init_app(cls, app: object = None) -> None: + """Set default configuration params for an application instance.""" + config = get_application_config(app) + config.setdefault('AWS_ACCESS_KEY_ID', '') + config.setdefault('AWS_SECRET_ACCESS_KEY', '') + config.setdefault('AWS_REGION', 'us-east-1') + config.setdefault('KINESIS_ENDPOINT', None) + config.setdefault('KINESIS_VERIFY', True) + config.setdefault('KINESIS_STREAM', 'SubmissionEvents') + config.setdefault('KINESIS_PARTITION_KEY', '0') + + @classmethod + def get_session(cls, app: object = None) -> 'StreamPublisher': + """Get a new session with the stream.""" + config = get_application_config(app) + aws_access_key_id = config['AWS_ACCESS_KEY_ID'] + aws_secret_access_key = config['AWS_SECRET_ACCESS_KEY'] + aws_region = config['AWS_REGION'] + kinesis_endpoint = config['KINESIS_ENDPOINT'] + kinesis_verify = config['KINESIS_VERIFY'] + kinesis_stream = config['KINESIS_STREAM'] + partition_key = config['KINESIS_PARTITION_KEY'] + return cls(kinesis_stream, partition_key, aws_access_key_id, + aws_secret_access_key, aws_region, kinesis_endpoint, + kinesis_verify) + + @classmethod + def current_session(cls) -> 'StreamPublisher': + """Get/create :class:`.StreamPublisher` for this context.""" + g = get_application_global() + if not g: + return cls.get_session() + elif 'stream' not in g: + g.stream = cls.get_session() # type: ignore + return g.stream # type: ignore + + def is_available(self, **kwargs) -> bool: + """Test our ability to put records.""" + data = bytes(dumps({}), encoding='utf-8') + try: + self.client.put_record(StreamName=self.stream, Data=data, + PartitionKey=self.partition_key) + except Exception as e: + logger.error('Encountered error while putting to stream: %s', e) + return False + return True + + def _create_stream(self) -> None: + try: + self.client.create_stream(StreamName=self.stream, ShardCount=1) + except self.client.exceptions.ResourceInUseException: + logger.info('Stream %s already exists', self.stream) + return + + def _wait_for_stream(self, retries: int = 0, delay: int = 0) -> None: + waiter = self.client.get_waiter('stream_exists') + waiter.wait( + StreamName=self.stream, + WaiterConfig={ + 'Delay': delay, + 'MaxAttempts': retries + } + ) + + @retry(RuntimeError, tries=5, delay=2, backoff=2) + def initialize(self) -> None: + """Perform initial checks, e.g. at application start-up.""" + logger.info('initialize Kinesis stream') + data = bytes(dumps({}), encoding='utf-8') + try: + self.client.put_record(StreamName=self.stream, Data=data, + PartitionKey=self.partition_key) + logger.info('storage service is already available') + except ClientError as exc: + if exc.response['Error']['Code'] == 'ResourceNotFoundException': + logger.info('stream does not exist; creating') + self._create_stream() + logger.info('wait for stream to be available') + self._wait_for_stream(retries=10, delay=5) + raise RuntimeError('Failed to initialize stream') from exc + except self.client.exceptions.ResourceNotFoundException: + logger.info('stream does not exist; creating') + self._create_stream() + logger.info('wait for stream to be available') + self._wait_for_stream(retries=10, delay=5) + except Exception as exc: + raise RuntimeError('Failed to initialize stream') from exc + return + + def put(self, event: Event, before: Submission, after: Submission) -> None: + """Put an :class:`.Event` on the stream.""" + payload = {'event': event, 'before': before, 'after': after} + data = bytes(dumps(payload), encoding='utf-8') + self.client.put_record(StreamName=self.stream, Data=data, + PartitionKey=self.partition_key) diff --git a/core/arxiv/submission/templates/submission-core/confirmation-email.html b/core/arxiv/submission/templates/submission-core/confirmation-email.html new file mode 100644 index 0000000..364ad7e --- /dev/null +++ b/core/arxiv/submission/templates/submission-core/confirmation-email.html @@ -0,0 +1,28 @@ +{% extends "mail/base.html" %} + +{% block email_title %}arXiv submission submit/{{ submission_id }}{% endblock email_title %} + +{% block message_title %}We have received your submission to arXiv, titled "{{ submission.metadata.title }}"{% endblock message_title %} + +{% block message_body %} +

+ Your temporary submission identifier is: submit/{{ submission_id }}. + To preview your submission, check the + submission status page. +

+ +

+ Your article is scheduled to be announced at {{ announce_time.strftime("%a, %-d %b %Y %H:%M:%S ET") }}. The abstract + will appear in the subsequent mailing as displayed below, except that the + submission identifier will be replaced by the official arXiv identifier. + Updates before {{ freeze_time.strftime("%a, %-d %b %Y %H:%M:%S ET") }} will not delay announcement. +

+ +

+ A paper password will be emailed to you when the article is announced. You + should share this with co-authors to allow them to claim ownership. If you + have a problem that you are not able to resolve through the web interface, + contact {{ config.SUPPORT_EMAIL }} with a + description of the issue and reference the submission identifier. +

+{% endblock message_body %} diff --git a/core/arxiv/submission/templates/submission-core/confirmation-email.txt b/core/arxiv/submission/templates/submission-core/confirmation-email.txt new file mode 100644 index 0000000..9db073c --- /dev/null +++ b/core/arxiv/submission/templates/submission-core/confirmation-email.txt @@ -0,0 +1,38 @@ +{% import "base/macros.html" as macros %} + +We have received your submission to arXiv. + +Your temporary submission identifier is: submit/{{ submission_id }}. You may +update your submission at: {{ url_for("submission", +submission_id=submission_id) }}. + +Your article is scheduled to be announced at {{ announce_time.strftime("%a, %-d %b %Y %H:%M:%S ET") }}. The +abstract will appear in the subsequent mailing as displayed below, except that +the submission identifier will be replaced by the official arXiv identifier. +Updates before {{ freeze_time.strftime("%a, %-d %b %Y %H:%M:%S ET") }} will not delay announcement. + +A paper password will be emailed to you when the article is announced. You +should share this with co-authors to allow them to claim ownership. If you have +a problem that you are not able to resolve through the web interface, contact +{{ config.SUPPORT_EMAIL }} with a description of the issue and reference the +submission identifier. + +{{ macros.abs_plaintext( + arxiv_id, + submission.metadata.title, + submission.metadata.authors_display, + submission.metadata.abstract, + submission.created, + submission.primary_classification.category, + submission.creator.name, + submission.creator.email, + submission.source_content.uncompressed_size, + submission.license.uri, + comments = submission.metadata.comments, + msc_class = submission.metadata.msc_class, + acm_class = submission.metadata.acm_class, + journal_ref = submission.metadata.journal_ref, + report_num = submission.metadata.report_num, + version = submission.version, + submission_history = [], + secondary_categories = submission.secondary_categories) }} diff --git a/core/events/tests/__init__.py b/core/arxiv/submission/tests/__init__.py similarity index 100% rename from core/events/tests/__init__.py rename to core/arxiv/submission/tests/__init__.py diff --git a/core/arxiv/submission/tests/annotations/__init__.py b/core/arxiv/submission/tests/annotations/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/arxiv/submission/tests/api/__init__.py b/core/arxiv/submission/tests/api/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/events/tests/test_api.py b/core/arxiv/submission/tests/api/test_api.py similarity index 50% rename from core/events/tests/test_api.py rename to core/arxiv/submission/tests/api/test_api.py index 0f11231..8d50d92 100644 --- a/core/events/tests/test_api.py +++ b/core/arxiv/submission/tests/api/test_api.py @@ -5,33 +5,32 @@ from collections import defaultdict from datetime import datetime, timedelta from flask import Flask -from events import save, load, Submission, User, Event, UpdateMetadata, \ - EventRule, RuleCondition, RuleConsequence, CreateComment, \ - SubmissionMetadata, CreateSubmission, UpdateAuthors, Author -from events.exceptions import NoSuchSubmission, InvalidEvent -from events.services import classic +from pytz import UTC +from ... import save, load, core, Submission, User, Event, \ + SubmissionMetadata, CreateSubmission, SetAuthors, Author, \ + SetTitle, SetAbstract +from ...exceptions import NoSuchSubmission, InvalidEvent +from ...services import classic -def mock_store_events(*events, submission): - """Mock for :func:`events.services.database.store_events`.""" - if submission.submission_id is None: - submission.submission_id = 1 - for event in events: - event.committed = True - event.submission_id = submission.submission_id - return submission +def mock_store_event(event, before, after, emit): + event.submission_id = 1 + after.submission_id = 1 + event.committed = True + emit(event) + return event, after class TestLoad(TestCase): """Test :func:`.load`.""" - @mock.patch('events.classic') + @mock.patch('submission.core.classic') def test_load_existant_submission(self, mock_classic): """When the submission exists, submission and events are returned.""" u = User(12345, 'joe@joe.joe') mock_classic.get_submission.return_value = ( Submission(creator=u, submission_id=1, owner=u, - created=datetime.now()), + created=datetime.now(UTC)), [CreateSubmission(creator=u, submission_id=1, committed=True)] ) submission, events = load(1) @@ -43,7 +42,7 @@ def test_load_existant_submission(self, mock_classic): self.assertIsInstance(events[0], Event, "A list of events should be returned") - @mock.patch('events.classic') + @mock.patch('submission.core.classic') def test_load_nonexistant_submission(self, mock_classic): """When the submission does not exist, an exception is raised.""" mock_classic.get_submission.side_effect = classic.NoSuchSubmission @@ -55,10 +54,12 @@ def test_load_nonexistant_submission(self, mock_classic): class TestSave(TestCase): """Test :func:`.save`.""" - @mock.patch('events.classic') - def test_save_creation_event(self, mock_database): + @mock.patch(f'{core.__name__}.StreamPublisher') + @mock.patch('submission.core.classic') + def test_save_creation_event(self, mock_database, mock_publisher): """A :class:`.CreationEvent` is passed.""" - mock_database.store_events = mock_store_events + mock_database.store_event = mock_store_event + mock_database.exceptions = classic.exceptions user = User(12345, 'joe@joe.joe') event = CreateSubmission(creator=user) submission, events = save(event) @@ -71,76 +72,102 @@ def test_save_creation_event(self, mock_database): self.assertIsNotNone(submission.submission_id, "Submission ID should be set.") - @mock.patch('events.classic') - def test_save_events_from_scratch(self, mock_database): + self.assertEqual(mock_publisher.put.call_count, 1) + args = event, None, submission + self.assertTrue(mock_publisher.put.called_with(*args)) + + @mock.patch(f'{core.__name__}.StreamPublisher') + @mock.patch('submission.core.classic') + def test_save_events_from_scratch(self, mock_database, mock_publisher): """Save multiple events for a nonexistant submission.""" - mock_database.store_events = mock_store_events + mock_database.store_event = mock_store_event + mock_database.exceptions = classic.exceptions user = User(12345, 'joe@joe.joe') e = CreateSubmission(creator=user) - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) + e2 = SetTitle(creator=user, title='footitle') submission, events = save(e, e2) - self.assertEqual(submission.metadata.title, 'foo') + self.assertEqual(submission.metadata.title, 'footitle') self.assertIsInstance(submission.submission_id, int) self.assertEqual(submission.created, e.created) - @mock.patch('events.classic') - def test_create_and_update_authors(self, mock_database): + self.assertEqual(mock_publisher.put.call_count, 2) + self.assertEqual(mock_publisher.put.mock_calls[0][1][0], e) + self.assertEqual(mock_publisher.put.mock_calls[1][1][0], e2) + + @mock.patch(f'{core.__name__}.StreamPublisher') + @mock.patch('submission.core.classic') + def test_create_and_update_authors(self, mock_database, mock_publisher): """Save multiple events for a nonexistant submission.""" - mock_database.store_events = mock_store_events + mock_database.store_event = mock_store_event + mock_database.exceptions = classic.exceptions user = User(12345, 'joe@joe.joe') e = CreateSubmission(creator=user) - e2 = UpdateAuthors(creator=user, authors=[ + e2 = SetAuthors(creator=user, authors=[ Author(0, forename='Joe', surname="Bloggs", email="joe@blog.gs") ]) submission, events = save(e, e2) self.assertIsInstance(submission.metadata.authors[0], Author) - @mock.patch('events.classic') + self.assertEqual(mock_publisher.put.call_count, 2) + self.assertEqual(mock_publisher.put.mock_calls[0][1][0], e) + self.assertEqual(mock_publisher.put.mock_calls[1][1][0], e2) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + @mock.patch('submission.core.classic') def test_save_from_scratch_without_creation_event(self, mock_database): """An exception is raised when there is no creation event.""" - mock_database.store_events = mock_store_events + mock_database.store_event = mock_store_event user = User(12345, 'joe@joe.joe') - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) + e2 = SetTitle(creator=user, title='foo') with self.assertRaises(NoSuchSubmission): save(e2) - @mock.patch('events.classic') - def test_save_events_on_existing_submission(self, mock_db): + @mock.patch(f'{core.__name__}.StreamPublisher') + @mock.patch('submission.core.classic') + def test_save_events_on_existing_submission(self, mock_db, mock_publisher): """Save multiple sets of events in separate calls to :func:`.save`.""" - cache = defaultdict(list) - - def mock_store_events_with_cache(*events, submission): - if submission.submission_id is None: - submission.submission_id = 1 - for event in events: - event.committed = True - event.submission_id = submission.submission_id - cache[event.submission_id].append(event) - return submission - - def mock_get_events(submission_id): + mock_db.exceptions = classic.exceptions + cache = {} + + def mock_store_event_with_cache(event, before, after, emit): + if after.submission_id is None: + if before is not None: + before.submission_id = 1 + after.submission_id = 1 + + event.committed = True + event.submission_id = after.submission_id + if event.submission_id not in cache: + cache[event.submission_id] = (None, []) + cache[event.submission_id] = ( + after, cache[event.submission_id][1] + [event] + ) + emit(event) + return event, after + + def mock_get_events(submission_id, *args, **kwargs): return cache[submission_id] - mock_db.store_events = mock_store_events_with_cache - mock_db.get_events = mock_get_events + mock_db.store_event = mock_store_event_with_cache + mock_db.get_submission = mock_get_events # Here is the first set of events. user = User(12345, 'joe@joe.joe') e = CreateSubmission(creator=user) - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) + e2 = SetTitle(creator=user, title='footitle') submission, _ = save(e, e2) submission_id = submission.submission_id # Now we apply a second set of events. - e3 = UpdateMetadata(creator=user, metadata=[['abstract', 'bar']]) + e3 = SetAbstract(creator=user, abstract='bar'*10) submission2, _ = save(e3, submission_id=submission_id) # The submission state reflects all three events. - self.assertEqual(submission2.metadata.abstract, 'bar', + self.assertEqual(submission2.metadata.abstract, 'bar'*10, "State of the submission should reflect both sets" " of events.") - self.assertEqual(submission2.metadata.title, 'foo', + self.assertEqual(submission2.metadata.title, 'footitle', "State of the submission should reflect both sets" " of events.") self.assertEqual(submission2.created, e.created, @@ -149,39 +176,7 @@ def mock_get_events(submission_id): self.assertEqual(submission2.submission_id, submission_id, "The submission ID should remain the same.") - # TODO: restore this when rules are implemented. - # - # @mock.patch('events.classic') - # def test_apply_events_with_rules(self, mock_db): - # """Save a set of events for which some rules apply.""" - # # Given the following rule... - # def mock_get_rules_for_submission(submission_id): - # return [ - # # If the metadata of any submission was updated, add a comment. - # EventRule( - # rule_id=1, - # creator=User('foo'), - # condition=RuleCondition( - # event_type=UpdateMetadata, - # extra_condition={} - # ), - # consequence=RuleConsequence( - # event_creator=User('foo'), - # event_type=CreateCommentEvent, - # event_data={ - # 'body': 'The metadata was updated', - # 'scope': 'private' - # } - # ) - # ) - # ] - # mock_db.get_rules = mock_get_rules_for_submission - # mock_db.store_events = mock_store_events - # e = CreateSubmission(creator=User('foo')) - # e2 = UpdateMetadata(creator=User('foo'), - # metadata=[['title', 'foo']]) - # submission, events = save(e, e2) - # self.assertEqual(len(submission.comments), 1, - # "A comment should be added to the submission.") - # self.assertEqual(len(events), 3, - # "A third event is added to the stack.") + self.assertEqual(mock_publisher.put.call_count, 3) + self.assertEqual(mock_publisher.put.mock_calls[0][1][0], e) + self.assertEqual(mock_publisher.put.mock_calls[1][1][0], e2) + self.assertEqual(mock_publisher.put.mock_calls[2][1][0], e3) diff --git a/core/arxiv/submission/tests/classic/__init__.py b/core/arxiv/submission/tests/classic/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/arxiv/submission/tests/classic/test_classic_integration.py b/core/arxiv/submission/tests/classic/test_classic_integration.py new file mode 100644 index 0000000..732cac3 --- /dev/null +++ b/core/arxiv/submission/tests/classic/test_classic_integration.py @@ -0,0 +1,1053 @@ +""" +Tests for integration with the classic system. + +Provides test cases for the new events model's ability to replicate the classic +model. The function `TestClassicUIWorkflow.test_classic_workflow()` provides +keyword arguments to pass different types of data through the workflow. + +TODO: Presently, `test_classic_workflow` expects `core.domain` objects. That +should change to instantiate each object at runtime for database imports. +""" + +from unittest import TestCase, mock +from datetime import datetime +import tempfile +from pytz import UTC +from flask import Flask + +from arxiv.base import Base +from arxiv import mail +from ..util import in_memory_db +from ... import * +from ...services import classic + + +class TestClassicUIWorkflow(TestCase): + """Replicate the classic submission UI workflow.""" + + def setUp(self): + """An arXiv user is submitting a new paper.""" + self.app = Flask(__name__) + self.app.config['EMAIL_ENABLED'] = False + self.app.config['WAIT_FOR_SERVICES'] = False + Base(self.app) + init_app(self.app) + mail.init_app(self.app) + self.submitter = domain.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.unicode_submitter = domain.User(12345, + email='j.user@somewhere.edu', + forename='大', surname='用户', + endorsements=['cs.DL', 'cs.IR']) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_classic_workflow(self, submitter=None, metadata=None, + authors=None): + """Submitter proceeds through workflow in a linear fashion.""" + + # Instantiate objects that have not yet been instantiated or use defaults. + if submitter is None: + submitter = self.submitter + if metadata is None: + metadata = [ + ('title', 'Foo title'), + ('abstract', "One morning, as Gregor Samsa was waking up..."), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ] + metadata = dict(metadata) + + + # TODO: Process data in dictionary form to Author objects. + if authors is None: + authors = [Author(order=0, + forename='Bob', + surname='Paulson', + email='Robert.Paulson@nowhere.edu', + affiliation='Fight Club' + )] + + with in_memory_db(self.app) as session: + # Submitter clicks on 'Start new submission' in the user dashboard. + submission, stack = save( + CreateSubmission(creator=submitter) + ) + self.assertIsNotNone(submission.submission_id, + "A submission ID is assigned") + self.assertEqual(len(stack), 1, "A single command is executed.") + + db_submission = session.query(classic.models.Submission)\ + .get(submission.submission_id) + self.assertEqual(db_submission.submission_id, + submission.submission_id, + "A row is added to the submission table") + self.assertEqual(db_submission.submitter_id, + submitter.native_id, + "Submitter ID set on submission") + self.assertEqual(db_submission.submitter_email, + submitter.email, + "Submitter email set on submission") + self.assertEqual(db_submission.submitter_name, submitter.name, + "Submitter name set on submission") + self.assertEqual(db_submission.created.replace(tzinfo=UTC), + submission.created, + "Creation datetime set correctly") + + # TODO: What else to check here? + + # /start: Submitter completes the start submission page. + license_uri = 'http://creativecommons.org/publicdomain/zero/1.0/' + submission, stack = save( + ConfirmContactInformation(creator=submitter), + ConfirmAuthorship( + creator=submitter, + submitter_is_author=True + ), + SetLicense( + creator=submitter, + license_uri=license_uri, + license_name='CC0 1.0' + ), + ConfirmPolicy(creator=submitter), + SetPrimaryClassification( + creator=submitter, + category='cs.DL' + ), + submission_id=submission.submission_id + ) + + self.assertEqual(len(stack), 6, + "Six commands have been executed in total.") + + db_submission = session.query(classic.models.Submission)\ + .get(submission.submission_id) + self.assertEqual(db_submission.userinfo, 1, + "Contact verification set correctly in database.") + self.assertEqual(db_submission.is_author, 1, + "Authorship status set correctly in database.") + self.assertEqual(db_submission.license, license_uri, + "License set correctly in database.") + self.assertEqual(db_submission.agree_policy, 1, + "Policy acceptance set correctly in database.") + self.assertEqual(len(db_submission.categories), 1, + "A single category is associated in the database") + self.assertEqual(db_submission.categories[0].is_primary, 1, + "Primary category is set correct in the database") + self.assertEqual(db_submission.categories[0].category, 'cs.DL', + "Primary category is set correct in the database") + + # /addfiles: Submitter has uploaded files to the file management + # service, and verified that they compile. Now they associate the + # content package with the submission. + submission, stack = save( + SetUploadPackage( + creator=submitter, + checksum="a9s9k342900skks03330029k", + source_format=domain.submission.SubmissionContent.Format('tex'), + identifier=123, + uncompressed_size=593992, + compressed_size=593992 + ), + submission_id=submission.submission_id + ) + + self.assertEqual(len(stack), 7, + "Seven commands have been executed in total.") + db_submission = session.query(classic.models.Submission)\ + .get(submission.submission_id) + self.assertEqual(db_submission.must_process, 1, + "There is no compilation yet") + self.assertEqual(db_submission.source_size, 593992, + "Source package size set correctly in database") + self.assertEqual(db_submission.source_format, 'tex', + "Source format set correctly in database") + + # /metadata: Submitter adds metadata to their submission, including + # authors. In this package, we model authors in more detail than + # in the classic system, but we should preserve the canonical + # format in the db for legacy components' sake. + submission, stack = save( + SetTitle(creator=self.submitter, title=metadata['title']), + SetAbstract(creator=self.submitter, + abstract=metadata['abstract']), + SetComments(creator=self.submitter, + comments=metadata['comments']), + SetJournalReference(creator=self.submitter, + journal_ref=metadata['journal_ref']), + SetDOI(creator=self.submitter, doi=metadata['doi']), + SetReportNumber(creator=self.submitter, + report_num=metadata['report_num']), + SetAuthors(creator=submitter, authors=authors), + submission_id=submission.submission_id + ) + db_submission = session.query(classic.models.Submission) \ + .get(submission.submission_id) + self.assertEqual(db_submission.title, dict(metadata)['title'], + "Title updated as expected in database") + self.assertEqual(db_submission.abstract, + dict(metadata)['abstract'], + "Abstract updated as expected in database") + self.assertEqual(db_submission.comments, + dict(metadata)['comments'], + "Comments updated as expected in database") + self.assertEqual(db_submission.report_num, + dict(metadata)['report_num'], + "Report number updated as expected in database") + self.assertEqual(db_submission.doi, dict(metadata)['doi'], + "DOI updated as expected in database") + self.assertEqual(db_submission.journal_ref, + dict(metadata)['journal_ref'], + "Journal ref updated as expected in database") + + author_str = ';'.join( + [f"{author.forename} {author.surname} ({author.affiliation})" + for author in authors] + ) + self.assertEqual(db_submission.authors, + author_str, + "Authors updated in canonical format in database") + self.assertEqual(len(stack), 14, + "Fourteen commands have been executed in total.") + + # /preview: Submitter adds a secondary classification. + submission, stack = save( + AddSecondaryClassification( + creator=submitter, + category='cs.IR' + ), + submission_id=submission.submission_id + ) + db_submission = session.query(classic.models.Submission)\ + .get(submission.submission_id) + + self.assertEqual(len(db_submission.categories), 2, + "A secondary category is added in the database") + secondaries = [ + db_cat for db_cat in db_submission.categories + if db_cat.is_primary == 0 + ] + self.assertEqual(len(secondaries), 1, + "A secondary category is added in the database") + self.assertEqual(secondaries[0].category, 'cs.IR', + "A secondary category is added in the database") + self.assertEqual(len(stack), 15, + "Fifteen commands have been executed in total.") + + # /preview: Submitter finalizes submission. + finalize = FinalizeSubmission(creator=submitter) + submission, stack = save( + finalize, submission_id=submission.submission_id + ) + db_submission = session.query(classic.models.Submission)\ + .get(submission.submission_id) + + self.assertEqual(db_submission.status, db_submission.SUBMITTED, + "Submission status set correctly in database") + self.assertEqual(db_submission.submit_time.replace(tzinfo=UTC), + finalize.created, + "Submit time is set.") + self.assertEqual(len(stack), 16, + "Sixteen commands have been executed in total.") + + def test_unicode_submitter(self): + """Submitter proceeds through workflow in a linear fashion.""" + submitter = self.unicode_submitter + metadata = [ + ('title', '优秀的称号'), + ('abstract', "当我有一天正在上学的时候当我有一天正在上学的时候"), + ('comments', '5页2龟鸠'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ] + authors = [Author(order=0, forename='惊人', surname='用户', + email='amazing.user@nowhere.edu', + affiliation='Fight Club')] + with self.app.app_context(): + self.app.config['ENABLE_CALLBACKS'] = 0 + self.test_classic_workflow(submitter=submitter, metadata=metadata, + authors=authors) + + def test_texism_titles(self): + """Submitter proceeds through workflow in a linear fashion.""" + metadata = [ + ('title', 'Revisiting $E = mc^2$'), + ('abstract', "$E = mc^2$ is a foundational concept in physics"), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ] + with self.app.app_context(): + self.app.config['ENABLE_CALLBACKS'] = 1 + self.test_classic_workflow(metadata=metadata) + + +class TestReplacementIntegration(TestCase): + """Test integration with the classic database with replacements.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + cls.app.config['WAIT_FOR_SERVICES'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """An arXiv user is submitting a new paper.""" + self.submitter = domain.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL']) + + # Create and finalize a new submission. + cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/' + with self.app.app_context(): + classic.create_all() + metadata=dict([ + ('title', 'Foo title'), + ('abstract', "One morning, as Gregor Samsa was..."), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ]) + self.submission, _ = save( + CreateSubmission(creator=self.submitter), + ConfirmContactInformation(creator=self.submitter), + ConfirmAuthorship( + creator=self.submitter, + submitter_is_author=True + ), + SetLicense( + creator=self.submitter, + license_uri=cc0, + license_name='CC0 1.0' + ), + ConfirmPolicy(creator=self.submitter), + SetPrimaryClassification( + creator=self.submitter, + category='cs.DL' + ), + SetUploadPackage( + creator=self.submitter, + checksum="a9s9k342900skks03330029k", + source_format=domain.submission.SubmissionContent.Format('tex'), + identifier=123, + uncompressed_size=593992, + compressed_size=593992 + ), + SetTitle(creator=self.submitter, title=metadata['title']), + SetAbstract(creator=self.submitter, + abstract=metadata['abstract']), + SetComments(creator=self.submitter, + comments=metadata['comments']), + SetJournalReference( + creator=self.submitter, + journal_ref=metadata['journal_ref'] + ), + SetDOI(creator=self.submitter, doi=metadata['doi']), + SetReportNumber(creator=self.submitter, + report_num=metadata['report_num']), + SetAuthors( + creator=self.submitter, + authors=[Author( + order=0, + forename='Bob', + surname='Paulson', + email='Robert.Paulson@nowhere.edu', + affiliation='Fight Club' + )] + ), + FinalizeSubmission(creator=self.submitter) + ) + + # Now publish. + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + primary = self.submission.primary_classification.category + db_submission.document = classic.models.Document( + document_id=1, + paper_id='1901.00123', + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=primary, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_submission.doc_paper_id = '1901.00123' + session.add(db_submission) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_replacement(self): + """User has started a replacement submission.""" + with self.app.app_context(): + submission_to_replace, _ = load(self.submission.submission_id) + creation_event = CreateSubmissionVersion(creator=self.submitter) + replacement, _ = save(creation_event, + submission_id=self.submission.submission_id) + + with self.app.app_context(): + replacement, _ = load(replacement.submission_id) + + session = classic.current_session() + db_replacement = session.query(classic.models.Submission) \ + .filter(classic.models.Submission.doc_paper_id + == replacement.arxiv_id) \ + .order_by(classic.models.Submission.submission_id.desc()) \ + .first() + + # Verify that the round-trip on the replacement submission worked + # as expected. + self.assertEqual(replacement.arxiv_id, + submission_to_replace.arxiv_id) + self.assertEqual(replacement.version, + submission_to_replace.version + 1) + self.assertEqual(replacement.status, Submission.WORKING) + self.assertTrue(submission_to_replace.is_announced) + self.assertFalse(replacement.is_announced) + + self.assertIsNone(replacement.source_content) + + self.assertFalse(replacement.submitter_contact_verified) + self.assertFalse(replacement.submitter_accepts_policy) + self.assertFalse(replacement.submitter_confirmed_preview) + self.assertFalse(replacement.submitter_contact_verified) + + # Verify that the database is in the right state for downstream + # integrations. + self.assertEqual(db_replacement.status, + classic.models.Submission.NEW) + self.assertEqual(db_replacement.type, + classic.models.Submission.REPLACEMENT) + self.assertEqual(db_replacement.doc_paper_id, '1901.00123') + + +class TestJREFIntegration(TestCase): + """Test integration with the classic database with JREF submissions.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + cls.app.config['WAIT_FOR_SERVICES'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """An arXiv user is submitting a new paper.""" + self.submitter = domain.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL']) + + # Create and finalize a new submission. + cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/' + with self.app.app_context(): + classic.create_all() + metadata=dict([ + ('title', 'Foo title'), + ('abstract', "One morning, as Gregor Samsa was..."), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234') + ]) + self.submission, _ = save( + CreateSubmission(creator=self.submitter), + ConfirmContactInformation(creator=self.submitter), + ConfirmAuthorship( + creator=self.submitter, + submitter_is_author=True + ), + SetLicense( + creator=self.submitter, + license_uri=cc0, + license_name='CC0 1.0' + ), + ConfirmPolicy(creator=self.submitter), + SetPrimaryClassification( + creator=self.submitter, + category='cs.DL' + ), + SetUploadPackage( + creator=self.submitter, + checksum="a9s9k342900skks03330029k", + source_format=domain.submission.SubmissionContent.Format('tex'), + identifier=123, + uncompressed_size=593992, + compressed_size=593992 + ), + SetTitle(creator=self.submitter, + title=metadata['title']), + SetAbstract(creator=self.submitter, + abstract=metadata['abstract']), + SetComments(creator=self.submitter, + comments=metadata['comments']), + SetReportNumber(creator=self.submitter, + report_num=metadata['report_num']), + SetAuthors( + creator=self.submitter, + authors=[Author( + order=0, + forename='Bob', + surname='Paulson', + email='Robert.Paulson@nowhere.edu', + affiliation='Fight Club' + )] + ), + ConfirmPreview(creator=self.submitter), + FinalizeSubmission(creator=self.submitter) + ) + + # Now publish. + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + primary = self.submission.primary_classification.category + db_submission.document = classic.models.Document( + document_id=1, + paper_id='1901.00123', + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=primary, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_submission.doc_paper_id = '1901.00123' + session.add(db_submission) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_jref(self): + """User has started a JREF submission.""" + with self.app.app_context(): + session = classic.current_session() + submission_to_jref, _ = load(self.submission.submission_id) + event = SetJournalReference( + creator=self.submitter, + journal_ref='Foo Rev 1, 2 (1903)' + ) + jref_submission, _ = save( + event, + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + jref_submission, _ = load(jref_submission.submission_id) + session = classic.current_session() + db_jref = session.query(classic.models.Submission) \ + .filter(classic.models.Submission.doc_paper_id + == jref_submission.arxiv_id) \ + .filter(classic.models.Submission.type + == classic.models.Submission.JOURNAL_REFERENCE) \ + .order_by(classic.models.Submission.submission_id.desc()) \ + .first() + + # Verify that the round-trip on the replacement submission worked + # as expected. + self.assertEqual(jref_submission.arxiv_id, + submission_to_jref.arxiv_id) + self.assertEqual(jref_submission.version, + submission_to_jref.version, + "The paper version should not change") + self.assertEqual(jref_submission.status, Submission.ANNOUNCED) + self.assertTrue(submission_to_jref.is_announced) + self.assertTrue(jref_submission.is_announced) + + self.assertIsNotNone(jref_submission.source_content) + + self.assertTrue(jref_submission.submitter_contact_verified) + self.assertTrue(jref_submission.submitter_accepts_policy) + self.assertTrue(jref_submission.submitter_confirmed_preview) + self.assertTrue(jref_submission.submitter_contact_verified) + + # Verify that the database is in the right state for downstream + # integrations. + self.assertEqual(db_jref.status, + classic.models.Submission.PROCESSING_SUBMISSION) + self.assertEqual(db_jref.type, + classic.models.Submission.JOURNAL_REFERENCE) + self.assertEqual(db_jref.doc_paper_id, '1901.00123') + self.assertEqual(db_jref.submitter_id, + jref_submission.creator.native_id) + + +class TestWithdrawalIntegration(TestCase): + """ + Test integration with the classic database concerning withdrawals. + + The :class:`.domain.submission.Submission` representation has only two + statuses: :attr:`.domain.submission.WITHDRAWAL_REQUESTED` and + :attr:`.domain.submission.WITHDRAWN`. Like other post-publish operations, + we are simply adding events to the single stream for the original + submission ID. This screens off details that are due to the underlying + implementation, and focuses on how humans are actually interacting with + withdrawals. + + On the classic side, we create a new row in the submission table for a + withdrawal request, and it passes through the same states as a regular + submission. + """ + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + cls.app.config['WAIT_FOR_SERVICES'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """An arXiv user is submitting a new paper.""" + self.submitter = domain.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL']) + + # Create and finalize a new submission. + cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/' + with self.app.app_context(): + classic.create_all() + metadata=dict([ + ('title', 'Foo title'), + ('abstract', "One morning, as Gregor Samsa was..."), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ]) + self.submission, _ = save( + CreateSubmission(creator=self.submitter), + ConfirmContactInformation(creator=self.submitter), + ConfirmAuthorship( + creator=self.submitter, + submitter_is_author=True + ), + SetLicense( + creator=self.submitter, + license_uri=cc0, + license_name='CC0 1.0' + ), + ConfirmPolicy(creator=self.submitter), + SetPrimaryClassification( + creator=self.submitter, + category='cs.DL' + ), + SetUploadPackage( + creator=self.submitter, + checksum="a9s9k342900skks03330029k", + source_format=domain.submission.SubmissionContent.Format('tex'), + identifier=123, + uncompressed_size=593992, + compressed_size=593992 + ), + SetTitle(creator=self.submitter, title=metadata['title']), + SetAbstract(creator=self.submitter, + abstract=metadata['abstract']), + SetComments(creator=self.submitter, + comments=metadata['comments']), + SetJournalReference( + creator=self.submitter, + journal_ref=metadata['journal_ref'] + ), + SetDOI(creator=self.submitter, doi=metadata['doi']), + SetReportNumber(creator=self.submitter, + report_num=metadata['report_num']), + SetAuthors( + creator=self.submitter, + authors=[Author( + order=0, + forename='Bob', + surname='Paulson', + email='Robert.Paulson@nowhere.edu', + affiliation='Fight Club' + )] + ), + FinalizeSubmission(creator=self.submitter) + ) + self.submission_id = self.submission.submission_id + + # Announce. + with self.app.app_context(): + session = classic.current_session() + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + primary = self.submission.primary_classification.category + db_submission.document = classic.models.Document( + document_id=1, + paper_id='1901.00123', + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=primary, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_submission.doc_paper_id = '1901.00123' + session.add(db_submission) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_request_withdrawal(self): + """Request a withdrawal.""" + with self.app.app_context(): + session = classic.current_session() + event = RequestWithdrawal(creator=self.submitter, + reason="short people got no reason") + submission, _ = save(event, submission_id=self.submission_id) + + submission, _ = load(self.submission_id) + self.assertEqual(submission.status, domain.Submission.ANNOUNCED) + request = list(submission.user_requests.values())[0] + self.assertEqual(request.reason_for_withdrawal, event.reason) + + wdr = session.query(classic.models.Submission) \ + .filter(classic.models.Submission.doc_paper_id == submission.arxiv_id) \ + .order_by(classic.models.Submission.submission_id.desc()) \ + .first() + self.assertEqual(wdr.status, + classic.models.Submission.PROCESSING_SUBMISSION) + self.assertEqual(wdr.type, classic.models.Submission.WITHDRAWAL) + self.assertIn(f"Withdrawn: {event.reason}", wdr.comments) + + +class TestPublicationIntegration(TestCase): + """ + Test integration with the classic database concerning publication. + + Since the publication process continues to run outside of the event model + in the short term, we need to be certain that publication-related changes + are represented accurately in this project. + """ + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + cls.app.config['WAIT_FOR_SERVICES'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """An arXiv user is submitting a new paper.""" + self.submitter = domain.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL']) + + # Create and finalize a new submission. + cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/' + with self.app.app_context(): + classic.create_all() + metadata=dict([ + ('title', 'Foo title'), + ('abstract', "One morning, as Gregor Samsa was..."), + ('comments', '5 pages, 2 turtle doves'), + ('report_num', 'asdf1234'), + ('doi', '10.01234/56789'), + ('journal_ref', 'Foo Rev 1, 2 (1903)') + ]) + self.submission, _ = save( + CreateSubmission(creator=self.submitter), + ConfirmContactInformation(creator=self.submitter), + ConfirmAuthorship( + creator=self.submitter, + submitter_is_author=True + ), + SetLicense( + creator=self.submitter, + license_uri=cc0, + license_name='CC0 1.0' + ), + ConfirmPolicy(creator=self.submitter), + SetPrimaryClassification( + creator=self.submitter, + category='cs.DL' + ), + SetUploadPackage( + creator=self.submitter, + checksum="a9s9k342900skks03330029k", + source_format=domain.submission.SubmissionContent.Format('tex'), + identifier=123, + uncompressed_size=593992, + compressed_size=593992 + ), + SetTitle(creator=self.submitter, + title=metadata['title']), + SetAbstract(creator=self.submitter, + abstract=metadata['abstract']), + SetComments(creator=self.submitter, + comments=metadata['comments']), + SetJournalReference( + creator=self.submitter, + journal_ref=metadata['journal_ref'] + ), + SetDOI(creator=self.submitter, doi=metadata['doi']), + SetReportNumber(creator=self.submitter, + report_num=metadata['report_num']), + SetAuthors( + creator=self.submitter, + authors=[Author( + order=0, + forename='Bob', + surname='Paulson', + email='Robert.Paulson@nowhere.edu', + affiliation='Fight Club' + )] + ), + FinalizeSubmission(creator=self.submitter) + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_publication_status_is_reflected(self): + """The submission has been announced/announced.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + primary = self.submission.primary_classification.category + db_submission.document = classic.models.Document( + document_id=1, + paper_id='1901.00123', + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=primary, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + session.add(db_submission) + session.commit() + + # Submission state should reflect publication status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.ANNOUNCED, + "Submission should have announced status.") + self.assertEqual(submission.arxiv_id, "1901.00123", + "arXiv paper ID should be set") + self.assertFalse(submission.is_active, + "Announced submission should no longer be active") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_publication_status_is_reflected_after_files_expire(self): + """The submission has been announced/announced, and files expired.""" + paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.DELETED_ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + primary = self.submission.primary_classification.category + db_submission.document = classic.models.Document( + document_id=1, + paper_id=paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=primary, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_submission.doc_paper_id = paper_id + session.add(db_submission) + session.commit() + + # Submission state should reflect publication status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.ANNOUNCED, + "Submission should have announced status.") + self.assertEqual(submission.arxiv_id, "1901.00123", + "arXiv paper ID should be set") + self.assertFalse(submission.is_active, + "Announced submission should no longer be active") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_scheduled_status_is_reflected(self): + """The submission has been scheduled for publication today.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.PROCESSING + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.SCHEDULED, + "Submission should have scheduled status.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_scheduled_status_is_reflected_processing_submission(self): + """The submission has been scheduled for publication today.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.PROCESSING_SUBMISSION + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.SCHEDULED, + "Submission should have scheduled status.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_scheduled_status_is_reflected_prior_to_announcement(self): + """The submission is being announced; not yet announced.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.NEEDS_EMAIL + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.SCHEDULED, + "Submission should have scheduled status.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_scheduled_tomorrow_status_is_reflected(self): + """The submission has been scheduled for publication tomorrow.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.NEXT_PUBLISH_DAY + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.SCHEDULED, + "Submission should be scheduled for tomorrow.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_publication_failed(self): + """The submission was not announced successfully.""" + with self.app.app_context(): + session = classic.current_session() + + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = db_submission.ERROR_STATE + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.ERROR, + "Submission should have error status.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_deleted(self): + """The submission was deleted by the classic system.""" + with self.app.app_context(): + session = classic.current_session() + + for classic_status in classic.models.Submission.DELETED: + # Publication agent publishes the paper. + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + db_submission.status = classic_status + session.add(db_submission) + session.commit() + + # Submission state should reflect scheduled status. + submission, _ = load(self.submission.submission_id) + self.assertEqual(submission.status, submission.DELETED, + "Submission should have deleted status.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_deleted_in_ng(self): + """The submission was deleted in this package.""" + with self.app.app_context(): + session = classic.current_session() + self.submission, _ = save( + Rollback(creator=self.submitter), + submission_id=self.submission.submission_id + ) + + db_submission = session.query(classic.models.Submission)\ + .get(self.submission.submission_id) + self.assertEqual(db_submission.status, + classic.models.Submission.USER_DELETED) diff --git a/core/arxiv/submission/tests/examples/__init__.py b/core/arxiv/submission/tests/examples/__init__.py new file mode 100644 index 0000000..97fd7ed --- /dev/null +++ b/core/arxiv/submission/tests/examples/__init__.py @@ -0,0 +1,7 @@ +""" +Tests based on user workflow examples. + +The tests in this version of the project assume that we are working in the NG +paradigm for submissions only, and that moderation, publication, etc continue +to rely on the classic model. +""" diff --git a/core/arxiv/submission/tests/examples/test_01_working_submission.py b/core/arxiv/submission/tests/examples/test_01_working_submission.py new file mode 100644 index 0000000..3572f28 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_01_working_submission.py @@ -0,0 +1,180 @@ +"""Example 1: working submission.""" + +from unittest import TestCase, mock +import tempfile + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions +from ... import core + + +class TestWorkingSubmission(TestCase): + """ + Submitter creates a new submission, has completed some but not all fields. + + This is a typical scenario in which the user has missed a step, or left + something required blank. These should get caught early if we designed + the UI or API right, but it's possible that something slipped through. + """ + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create and partially complete the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title='the best title', **self.defaults) + ) + self.submission_id = self.submission.submission_id + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_working_state(self): + """The submission in in the working state.""" + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.NOT_SUBMITTED, + "The classic submission is in the not submitted" + " state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_delete(self): + """The submission can be deleted.""" + with self.app.app_context(): + save(domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + + self.assertEqual(submission.status, + domain.event.Submission.DELETED, + "Submission is in the deleted state") + self.assertFalse(submission.is_active, + "The submission is no longer considered active.") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission_id) + self.assertEqual(submission.status, + domain.event.Submission.DELETED, + "Submission is in the deleted state") + self.assertFalse(submission.is_active, + "The submission is no longer considered active.") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.USER_DELETED, + "The classic submission is in the DELETED state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_finalize_submission(self): + """The submission cannot be finalized.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a FinalizeSubmission command results in an" + " exception.")): + save(domain.event.FinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_replace_submission(self): + """The submission cannot be replaced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a CreateSubmissionVersion command results in an" + " exception.")): + save(domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_withdraw_submission(self): + """The submission cannot be withdrawn.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a RequestWithdrawal command results in an" + " exception.")): + save(domain.event.RequestWithdrawal(reason="the best reason", + **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_be_unfinalized(self): + """The submission cannot be unfinalized.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating an UnFinalizeSubmission command results in an" + " exception.")): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_working_state() diff --git a/core/arxiv/submission/tests/examples/test_02_finalized_submission.py b/core/arxiv/submission/tests/examples/test_02_finalized_submission.py new file mode 100644 index 0000000..043c3e4 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_02_finalized_submission.py @@ -0,0 +1,200 @@ +"""Example 2: finalized submission.""" + +from unittest import TestCase, mock +import tempfile + +from flask import Flask + +from ...services import classic, StreamPublisher + +from ... import save, load, load_fast, domain, exceptions +from ... import core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' + + +class TestFinalizedSubmission(TestCase): + """ + Submitter creates, completes, and finalizes a new submission. + + At this point the submission is in the queue for moderation and + announcement. + """ + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, and complete the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category="cs.DL", + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_submitted_state(self): + """ + The submission is now submitted. + + This moves the submission into consideration for announcement, and + is visible to moderators. + """ + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.SUBMITTED, + "The submission is in the submitted state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.SUBMITTED, + "The submission is in the submitted state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.SUBMITTED, + "The classic submission is in the SUBMITTED" + " state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_replace_submission(self): + """The submission cannot be replaced: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a CreateSubmissionVersion command results in an" + " exception.")): + save(domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_withdraw_submission(self): + """The submission cannot be withdrawn: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a RequestWithdrawal command results in an" + " exception.")): + save(domain.event.RequestWithdrawal(reason="the best reason", + **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_edit_submission(self): + """The submission cannot be changed: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetTitle command results in an exception.")): + save(domain.event.SetTitle(title="A better title", + **self.defaults), + submission_id=self.submission.submission_id) + + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetDOI command results in an exception.")): + save(domain.event.SetDOI(doi="10.1000/182", **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_be_unfinalized(self): + """The submission can be unfinalized.""" + with self.app.app_context(): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.NOT_SUBMITTED, + "The classic submission is in the not submitted" + " state") diff --git a/core/arxiv/submission/tests/examples/test_03_on_hold_submission.py b/core/arxiv/submission/tests/examples/test_03_on_hold_submission.py new file mode 100644 index 0000000..009f9f8 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_03_on_hold_submission.py @@ -0,0 +1,205 @@ +"""Example 3: submission on hold.""" + +from unittest import TestCase, mock +import tempfile + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' + + +class TestOnHoldSubmission(TestCase): + """ + Submitter finalizes a new submission; the system places it on hold. + + This can be due to a variety of reasons. + """ + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, and complete the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category="cs.DL", + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Place the submission on hold. + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ON_HOLD + session.add(db_row) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_submitted_state(self): + """The submission is now on hold.""" + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + # self.assertEqual(submission.status, + # domain.submission.Submission.ON_HOLD, + # "The submission is in the hold state") + self.assertTrue(submission.is_on_hold, "The submission is on hold") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + # self.assertEqual(submission.status, + # domain.submission.Submission.ON_HOLD, + # "The submission is in the hold state") + self.assertTrue(submission.is_on_hold, "The submission is on hold") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.ON_HOLD, + "The classic submission is in the ON_HOLD" + " state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_replace_submission(self): + """The submission cannot be replaced: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a CreateSubmissionVersion command results in an" + " exception.")): + save(domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_withdraw_submission(self): + """The submission cannot be withdrawn: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a RequestWithdrawal command results in an" + " exception.")): + save(domain.event.RequestWithdrawal(reason="the best reason", + **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_edit_submission(self): + """The submission cannot be changed: it hasn't yet been announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetTitle command results in an exception.")): + save(domain.event.SetTitle(title="A better title", + **self.defaults), + submission_id=self.submission.submission_id) + + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetDOI command results in an exception.")): + save(domain.event.SetDOI(doi="10.1000/182", **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_submitted_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_be_unfinalized(self): + """The submission can be unfinalized.""" + with self.app.app_context(): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(len(submission.versions), 0, + "There are no announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.NOT_SUBMITTED, + "The classic submission is in the not submitted" + " state") + self.assertEqual(row.sticky_status, + classic.models.Submission.ON_HOLD, + "The hold is preserved as a sticky status") diff --git a/core/arxiv/submission/tests/examples/test_04_published_submission.py b/core/arxiv/submission/tests/examples/test_04_published_submission.py new file mode 100644 index 0000000..6891a63 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_04_published_submission.py @@ -0,0 +1,531 @@ +"""Example 4: submission is announced.""" + +from unittest import TestCase, mock +import tempfile +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' + + +class TestAnnouncedSubmission(TestCase): + """Submitter finalizes a new submission, and it is eventually announced.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_announced_state(self): + """The submission is now announced.""" + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state") + self.assertTrue(submission.is_announced, "Submission is announced") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state") + self.assertTrue(submission.is_announced, "Submission is announced") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission).all() + + self.assertEqual(len(db_rows), 1, + "There is one row in the submission table") + row = db_rows[0] + self.assertEqual(row.type, + classic.models.Submission.NEW_SUBMISSION, + "The classic submission has type 'new'") + self.assertEqual(row.status, + classic.models.Submission.ANNOUNCED, + "The classic submission is in the ANNOUNCED" + " state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_replace_submission(self): + """The submission can be replaced, resulting in a new version.""" + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.NOT_SUBMITTED, + "The second row is in not submitted state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_withdraw_submission(self): + """The submitter can request withdrawal of the submission.""" + withdrawal_reason = "the best reason" + with self.app.app_context(): + submission, events = save( + domain.event.RequestWithdrawal(reason=withdrawal_reason, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.WITHDRAWAL, + "The second row has type 'withdrawal'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The second row is in the processing submission" + " state.") + + # Cannot submit another withdrawal request while one is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestWithdrawal(reason="more reason", + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_request_crosslist(self): + """The submitter can request cross-list classification.""" + category = "cs.IR" + with self.app.app_context(): + submission, events = save( + domain.event.RequestCrossList(categories=[category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The second row is in the processing submission" + " state.") + + # Cannot submit another cross-list request while one is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestCrossList(categories=["q-fin.CP"], + **self.defaults), + submission_id=self.submission.submission_id) + + # Cannot submit a withdrawal request while a cross-list is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestWithdrawal(reason="more reason", + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_edit_submission_metadata(self): + """The submission metadata cannot be changed without a new version.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetTitle command results in an exception.")): + save(domain.event.SetTitle(title="A better title", + **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_announced_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_changing_doi(self): + """Submitter can set the DOI.""" + new_doi = "10.1000/182" + new_journal_ref = "Baz 1993" + new_report_num = "Report 82" + with self.app.app_context(): + submission, events = save( + domain.event.SetDOI(doi=new_doi, **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetJournalReference(journal_ref=new_journal_ref, + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetReportNumber(report_num=new_report_num, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.JOURNAL_REFERENCE, + "The second row has type journal ref") + self.assertEqual(db_rows[1].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The second row is in the processing submission" + " state.") + self.assertEqual(db_rows[1].doi, new_doi, + "The DOI is updated in the database.") + self.assertEqual(db_rows[1].journal_ref, new_journal_ref, + "The journal ref is updated in the database.") + self.assertEqual(db_rows[1].report_num, new_report_num, + "The report number is updated in the database.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_be_unfinalized(self): + """The submission cannot be unfinalized, because it is announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_announced_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_rolling_back_does_not_clobber_jref_changes(self): + """If user submits a JREF, rolling back does not clobber changes.""" + # These changes result in what we consider a "JREF submission" in + # classic. But we're moving away from that way of thinking in NG, so + # it should be somewhat opaque in a replacement/deletion scenario. + new_doi = "10.1000/182" + new_journal_ref = "Baz 1993" + new_report_num = "Report 82" + with self.app.app_context(): + submission, events = save( + domain.event.SetDOI(doi=new_doi, **self.defaults), + domain.event.SetJournalReference(journal_ref=new_journal_ref, + **self.defaults), + domain.event.SetReportNumber(report_num=new_report_num, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Now we get a replacement. + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + domain.event.SetTitle(title='A new and better title', + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Now the user rolls back the replacement. + with self.app.app_context(): + submission, events = save( + domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. The JREF changes shoulds stick. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is still updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is still updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is stil updated.") + self.assertEqual(submission.metadata.title, + self.submission.metadata.title, + "The title is reverted to the last announced" + " version.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is still updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is still updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is stil updated.") + self.assertEqual(submission.metadata.title, + self.submission.metadata.title, + "The title is reverted to the last announced" + " version.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") diff --git a/core/arxiv/submission/tests/examples/test_05_working_replacement.py b/core/arxiv/submission/tests/examples/test_05_working_replacement.py new file mode 100644 index 0000000..9f33d31 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_05_working_replacement.py @@ -0,0 +1,465 @@ +"""Example 5: submission is being replaced.""" + +from unittest import TestCase, mock +import tempfile +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' + + +class TestReplacementSubmissionInProgress(TestCase): + """Submitter creates a replacement, and is working on updates.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + with self.app.app_context(): + self.submission, self.events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_working_state(self): + """The submission is now in working state.""" + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertIsInstance(self.events[-2], domain.event.Announce, + "An Announce event is inserted.") + self.assertIsInstance(self.events[-1], + domain.event.CreateSubmissionVersion, + "A CreateSubmissionVersion event is" + " inserted.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertIsInstance(self.events[-2], domain.event.Announce, + "An Announce event is inserted.") + self.assertIsInstance(self.events[-1], + domain.event.CreateSubmissionVersion, + "A CreateSubmissionVersion event is" + " inserted.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.NOT_SUBMITTED, + "The second row is in not submitted state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_replace_submission_again(self): + """The submission cannot be replaced again while in working state.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + self.submission, self.events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_withdraw_submission(self): + """The submitter cannot request withdrawal of the submission.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + submission, events = save( + domain.event.RequestWithdrawal(reason="the best reason", + **self.defaults), + submission_id=self.submission.submission_id + ) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_edit_submission_metadata(self): + """The submission metadata can now be changed.""" + new_title = "A better title" + with self.app.app_context(): + submission, events = save( + domain.event.SetTitle(title=new_title, **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.metadata.title, new_title, + "The submission is changed") + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertIsInstance(events[-3], domain.event.Announce, + "An Announce event is inserted.") + self.assertIsInstance(events[-2], + domain.event.CreateSubmissionVersion, + "A CreateSubmissionVersion event is" + " inserted.") + self.assertIsInstance(events[-1], + domain.event.SetTitle, + "Metadata update events are reflected") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.metadata.title, new_title, + "The submission is changed") + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertIsInstance(events[-3], domain.event.Announce, + "An Announce event is inserted.") + self.assertIsInstance(events[-2], + domain.event.CreateSubmissionVersion, + "A CreateSubmissionVersion event is" + " inserted.") + self.assertIsInstance(events[-1], + domain.event.SetTitle, + "Metadata update events are reflected") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[0].title, self.submission.metadata.title, + "Announced row is unchanged.") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.NOT_SUBMITTED, + "The second row is in not submitted state") + self.assertEqual(db_rows[1].title, new_title, + "Replacement row reflects the change.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_changing_doi(self): + """Submitter can set the DOI as part of the new version.""" + new_doi = "10.1000/182" + new_journal_ref = "Baz 1993" + new_report_num = "Report 82" + with self.app.app_context(): + submission, events = save( + domain.event.SetDOI(doi=new_doi, **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetJournalReference(journal_ref=new_journal_ref, + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetReportNumber(report_num=new_report_num, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state.") + + self.assertIsInstance(events[-5], domain.event.Announce, + "An Announce event is inserted.") + self.assertIsInstance(events[-4], + domain.event.CreateSubmissionVersion, + "A CreateSubmissionVersion event is" + " inserted.") + self.assertIsInstance(events[-3], + domain.event.SetDOI, + "Metadata update events are reflected") + self.assertIsInstance(events[-2], + domain.event.SetJournalReference, + "Metadata update events are reflected") + self.assertIsInstance(events[-1], + domain.event.SetReportNumber, + "Metadata update events are reflected") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type replacement") + self.assertEqual(db_rows[1].status, + classic.models.Submission.NOT_SUBMITTED, + "The second row is in the not submitted state.") + self.assertEqual(db_rows[1].doi, new_doi, + "The DOI is updated in the database.") + self.assertEqual(db_rows[1].journal_ref, new_journal_ref, + "The journal ref is updated in the database.") + self.assertEqual(db_rows[1].report_num, new_report_num, + "The report number is updated in the database.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_be_unfinalized(self): + """The submission cannot be unfinalized, as it is not finalized.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_working_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_revert_to_most_recent_announced_version(self): + """Submitter can abandon changes to their replacement.""" + new_doi = "10.1000/182" + new_journal_ref = "Baz 1993" + new_report_num = "Report 82" + with self.app.app_context(): + submission, events = save( + domain.event.SetDOI(doi=new_doi, **self.defaults), + domain.event.SetJournalReference(journal_ref=new_journal_ref, + **self.defaults), + domain.event.SetReportNumber(report_num=new_report_num, + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.version, 1, + "Version number is rolled back") + self.assertEqual(submission.metadata.doi, + self.submission.metadata.doi, + "The DOI is reverted.") + self.assertEqual(submission.metadata.journal_ref, + self.submission.metadata.journal_ref, + "The journal ref is reverted.") + self.assertEqual(submission.metadata.report_num, + self.submission.metadata.report_num, + "The report number is reverted.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.version, 1, + "Version number is rolled back") + self.assertEqual(submission.metadata.doi, + self.submission.metadata.doi, + "The DOI is reverted.") + self.assertEqual(submission.metadata.journal_ref, + self.submission.metadata.journal_ref, + "The journal ref is reverted.") + self.assertEqual(submission.metadata.report_num, + self.submission.metadata.report_num, + "The report number is reverted.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type replacement") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is in the user deleted state.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_start_a_new_replacement_after_reverting(self): + """Submitter can start a new replacement after reverting.""" + with self.app.app_context(): + submission, events = save( + domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.version, 2, + "Version number is incremented.") + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "Submission is in working state") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") diff --git a/core/arxiv/submission/tests/examples/test_06_second_version_published.py b/core/arxiv/submission/tests/examples/test_06_second_version_published.py new file mode 100644 index 0000000..62555f9 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_06_second_version_published.py @@ -0,0 +1,420 @@ +"""Example 6: second version of a submission is announced.""" + +from unittest import TestCase, mock +import tempfile +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' + + +class TestSecondVersionIsAnnounced(TestCase): + """Submitter creates a replacement, and it is announced.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create and publish two versions.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.drop_all() + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + with self.app.app_context(): + new_title = "A better title" + self.submission, self.events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=new_title, **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=domain.submission.SubmissionContent.Format('tex'), identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Announce second version. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + db_rows[1].status = classic.models.Submission.ANNOUNCED + session.add(db_rows[1]) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_is_in_announced_state(self): + """The submission is now in announced state.""" + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the publushed state") + self.assertIsInstance(events[-1], domain.event.Announce, + "An Announce event is inserted.") + p_evts = [e for e in events if isinstance(e, domain.event.Announce)] + self.assertEqual(len(p_evts), 2, "There are two publish events.") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the publushed state") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in announced state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_replace_submission(self): + """The submission can be replaced, resulting in a new version.""" + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 3, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 3, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.REPLACEMENT, + "The third row has type 'replacement'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.NOT_SUBMITTED, + "The third row is in not submitted state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_withdraw_submission(self): + """The submitter can request withdrawal of the submission.""" + withdrawal_reason = "the best reason" + with self.app.app_context(): + submission, events = save( + domain.event.RequestWithdrawal(reason=withdrawal_reason, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.WITHDRAWAL, + "The third row has type 'withdrawal'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_edit_submission_metadata(self): + """The submission metadata cannot be changed without a new version.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent, msg=( + "Creating a SetTitle command results in an exception.")): + save(domain.event.SetTitle(title="A better title", + **self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_announced_state() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_changing_doi(self): + """Submitter can set the DOI.""" + new_doi = "10.1000/182" + new_journal_ref = "Baz 1993" + new_report_num = "Report 82" + with self.app.app_context(): + submission, events = save( + domain.event.SetDOI(doi=new_doi, **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetJournalReference(journal_ref=new_journal_ref, + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = save( + domain.event.SetReportNumber(report_num=new_report_num, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.metadata.doi, new_doi, + "The DOI is updated.") + self.assertEqual(submission.metadata.journal_ref, new_journal_ref, + "The journal ref is updated.") + self.assertEqual(submission.metadata.report_num, new_report_num, + "The report number is updated.") + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is in the submitted state.") + self.assertEqual(len(submission.versions), 2, + "There are two announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.JOURNAL_REFERENCE, + "The third row has type journal ref") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") + self.assertEqual(db_rows[2].doi, new_doi, + "The DOI is updated in the database.") + self.assertEqual(db_rows[2].journal_ref, new_journal_ref, + "The journal ref is updated in the database.") + self.assertEqual(db_rows[2].report_num, new_report_num, + "The report number is updated in the database.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_be_unfinalized(self): + """The submission cannot be unfinalized, because it is announced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.UnFinalizeSubmission(**self.defaults), + submission_id=self.submission.submission_id) + + self.test_is_in_announced_state() diff --git a/core/arxiv/submission/tests/examples/test_07_cross_list_requested.py b/core/arxiv/submission/tests/examples/test_07_cross_list_requested.py new file mode 100644 index 0000000..5d416a6 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_07_cross_list_requested.py @@ -0,0 +1,1086 @@ +"""Example 7: cross-list request.""" + +from unittest import TestCase, mock +import tempfile +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' +TEX = domain.submission.SubmissionContent.Format('tex') + + +class TestCrossListRequested(TestCase): + """Submitter has requested that a cross-list classification be added.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, + identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + # Request cross-list classification + self.category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestCrossList(categories=[self.category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_has_pending_requests(self): + """The submission has an outstanding publication.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The second row is in the processing submission" + " state.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_replace_submission(self): + """The submission cannot be replaced.""" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_withdraw_submission(self): + """The submitter cannot request withdrawal.""" + withdrawal_reason = "the best reason" + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestWithdrawal(reason=withdrawal_reason, + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_cannot_request_another_crosslist(self): + """The submitter cannot request a second cross-list.""" + # Cannot submit another cross-list request while one is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestCrossList(categories=["q-fin.CP"], + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_request_is_rejected(self): + """If the request is 'removed' in classic, NG request is rejected.""" + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + db_rows[1].status = classic.models.Submission.REMOVED + session.add(db_rows[1]) + session.commit() + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There are no pending user request.") + self.assertEqual(len(submission.rejected_user_requests), 1, + "There is one rejected user request.") + self.assertIsInstance( + submission.rejected_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.rejected_user_requests[0].categories, + "Requested category is set on request.") + self.assertNotIn(self.category, submission.secondary_categories, + "Requested category is not added to submission") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There are no pending user request.") + self.assertEqual(len(submission.rejected_user_requests), 1, + "There is one rejected user request.") + self.assertIsInstance( + submission.rejected_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.rejected_user_requests[0].categories, + "Requested category is set on request.") + self.assertNotIn(self.category, submission.secondary_categories, + "Requested category is not added to submission") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_request_is_applied(self): + """If the request is announced in classic, NG request is 'applied'.""" + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + db_rows[1].status = classic.models.Submission.ANNOUNCED + session.add(db_rows[1]) + session.commit() + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There are no pending user request.") + self.assertEqual(len(submission.applied_user_requests), 1, + "There is one applied user request.") + self.assertIsInstance( + submission.applied_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.applied_user_requests[0].categories, + "Requested category is set on request.") + self.assertIn(self.category, submission.secondary_categories, + "Requested category is added to submission") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There are no pending user request.") + self.assertEqual(len(submission.applied_user_requests), 1, + "There is one applied user request.") + self.assertIsInstance( + submission.applied_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.applied_user_requests[0].categories, + "Requested category is set on request.") + self.assertIn(self.category, submission.secondary_categories, + "Requested category is added to submission") + + +class TestCrossListApplied(TestCase): + """Request for cross-list has been approved and applied.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + # Request cross-list classification + self.category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestCrossList(categories=[self.category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Apply. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + db_rows[1].status = classic.models.Submission.ANNOUNCED + session.add(db_rows[1]) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_has_applied_requests(self): + """The submission has an applied request.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.applied_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.applied_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.applied_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.applied_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.applied_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.applied_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in the processing submission" + " state.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_replace_submission(self): + """The submission can be replaced, resulting in a new version.""" + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in the announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.REPLACEMENT, + "The third row has type 'replacement'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.NOT_SUBMITTED, + "The third row is in not submitted state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_withdraw_submission(self): + """The submitter can request withdrawal of the submission.""" + withdrawal_reason = "the best reason" + with self.app.app_context(): + submission, events = save( + domain.event.RequestWithdrawal(reason=withdrawal_reason, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in the announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.WITHDRAWAL, + "The third row has type 'withdrawal'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") + + # Cannot submit another withdrawal request while one is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestWithdrawal(reason="more reason", + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_request_crosslist(self): + """The submitter can request cross-list classification.""" + category = "cs.LO" + with self.app.app_context(): + submission, events = save( + domain.event.RequestCrossList(categories=[category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.ANNOUNCED, + "The second row is in the announced state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.CROSS_LIST, + "The third row has type 'cross'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") + + +class TestCrossListRejected(TestCase): + """Request for cross-list has been rejected.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, + identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + # Request cross-list classification + self.category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestCrossList(categories=[self.category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Apply. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + db_rows[1].status = classic.models.Submission.REMOVED + session.add(db_rows[1]) + session.commit() + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_has_rejected_request(self): + """The submission has a rejected request.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There is are no pending user requests.") + self.assertEqual(len(submission.rejected_user_requests), 1, + "There is one rejected user request.") + self.assertIsInstance( + submission.rejected_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.rejected_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertFalse(submission.has_active_requests, + "The submission has no active requests.") + self.assertEqual(len(submission.pending_user_requests), 0, + "There is are no pending user requests.") + self.assertEqual(len(submission.rejected_user_requests), 1, + "There is one rejected user request.") + self.assertIsInstance( + submission.rejected_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(self.category, + submission.rejected_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.REMOVED, + "The second row is in the removed state.") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_replace_submission(self): + """The submission can be replaced, resulting in a new version.""" + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is in the working state") + self.assertEqual(submission.version, 2, + "The version number is incremented by 1") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.REMOVED, + "The second row is in the removed state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.REPLACEMENT, + "The third row has type 'replacement'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.NOT_SUBMITTED, + "The third row is in not submitted state") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_withdraw_submission(self): + """The submitter can request withdrawal of the submission.""" + withdrawal_reason = "the best reason" + with self.app.app_context(): + submission, events = save( + domain.event.RequestWithdrawal(reason=withdrawal_reason, + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance(submission.pending_user_requests[0], + domain.submission.WithdrawalRequest) + self.assertEqual( + submission.pending_user_requests[0].reason_for_withdrawal, + withdrawal_reason, + "Withdrawal reason is set on request." + ) + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.REMOVED, + "The second row is in the removed state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.WITHDRAWAL, + "The third row has type 'withdrawal'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") + + # Cannot submit another withdrawal request while one is pending. + with self.app.app_context(): + with self.assertRaises(exceptions.InvalidEvent): + save(domain.event.RequestWithdrawal(reason="more reason", + **self.defaults), + submission_id=self.submission.submission_id) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_request_crosslist(self): + """The submitter can request cross-list classification.""" + category = "cs.LO" + with self.app.app_context(): + submission, events = save( + domain.event.RequestCrossList(categories=[category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + # Check the submission state. + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is announced.") + self.assertTrue(submission.has_active_requests, + "The submission has an active request.") + self.assertEqual(len(submission.pending_user_requests), 1, + "There is one pending user request.") + self.assertIsInstance( + submission.pending_user_requests[0], + domain.submission.CrossListClassificationRequest + ) + self.assertIn(category, + submission.pending_user_requests[0].categories, + "Requested category is set on request.") + self.assertEqual(len(submission.versions), 1, + "There is one announced versions") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is announced") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.REMOVED, + "The second row is in the removed state") + self.assertEqual(db_rows[2].type, + classic.models.Submission.CROSS_LIST, + "The third row has type 'cross'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is in the processing submission" + " state.") diff --git a/core/arxiv/submission/tests/examples/test_10_abandon_submission.py b/core/arxiv/submission/tests/examples/test_10_abandon_submission.py new file mode 100644 index 0000000..ce905d4 --- /dev/null +++ b/core/arxiv/submission/tests/examples/test_10_abandon_submission.py @@ -0,0 +1,682 @@ +"""Example 10: abandoning submissions and requests.""" + +from unittest import TestCase, mock +import tempfile +from datetime import datetime +from pytz import UTC + +from flask import Flask + +from ...services import classic +from ... import save, load, load_fast, domain, exceptions, core + +CCO = 'http://creativecommons.org/publicdomain/zero/1.0/' +TEX = domain.submission.SubmissionContent.Format('tex') + + +class TestAbandonSubmission(TestCase): + """Submitter has started a submission.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults) + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_abandon_new_submission(self): + """Submitter abandons new submission.""" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.DELETED, + "The submission is DELETED.") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.DELETED, + "The submission is DELETED.") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 1, + "There are one rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.USER_DELETED, + "The first row is USER_DELETED") + + +class TestAbandonReplacement(TestCase): + """Submitter has started a replacement and then rolled it back.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, + identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + self.submission, self.events = save( + domain.event.Rollback(**self.defaults), + submission_id=self.submission.submission_id + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_abandon_replacement_submission(self): + """The replacement is cancelled.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + self.assertEqual(submission.version, 1, "Back to v1") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + self.assertEqual(submission.version, 1, "Back to v1") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_can_start_new_replacement(self): + """The user can start a new replacement.""" + with self.app.app_context(): + submission, events = save( + domain.event.CreateSubmissionVersion(**self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is WORKING.") + self.assertEqual(submission.version, 2, "On to v2") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.WORKING, + "The submission is WORKING.") + self.assertEqual(submission.version, 2, "On to v2") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are three rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.REPLACEMENT, + "The second row has type 'replacement'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + self.assertEqual(db_rows[2].type, + classic.models.Submission.REPLACEMENT, + "The third row has type 'replacement'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.NOT_SUBMITTED, + "The third row is NOT_SUBMITTED") + + +class TestCrossListCancelled(TestCase): + """Submitter has created and cancelled a cross-list request.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, + identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + # Request cross-list classification + category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestCrossList(categories=[category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + request_id = self.submission.active_user_requests[0].request_id + self.submission, self.events = save( + domain.event.CancelRequest(request_id=request_id, + **self.defaults), + submission_id=self.submission.submission_id + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_request_is_cancelled(self): + """Submitter has cancelled the cross-list request.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_user_can_make_another_request(self): + """User can now make another request.""" + # Request cross-list classification + category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestCrossList(categories=[category], + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.CROSS_LIST, + "The second row has type 'cross'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + self.assertEqual(db_rows[2].type, + classic.models.Submission.CROSS_LIST, + "The third row has type 'cross'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is PROCESSING_SUBMISSION") + + +class TestWithdrawalCancelled(TestCase): + """Submitter has created and cancelled a withdrawal request.""" + + @classmethod + def setUpClass(cls): + """Instantiate an app for use with a SQLite database.""" + _, db = tempfile.mkstemp(suffix='.sqlite') + cls.app = Flask('foo') + cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' + cls.app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with cls.app.app_context(): + classic.init_app(cls.app) + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def setUp(self): + """Create, complete, and publish the submission.""" + self.submitter = domain.agent.User(1234, email='j.user@somewhere.edu', + forename='Jane', surname='User', + endorsements=['cs.DL', 'cs.IR']) + self.defaults = {'creator': self.submitter} + with self.app.app_context(): + classic.create_all() + self.title = "the best title" + self.doi = "10.01234/56789" + self.category = "cs.DL" + self.submission, self.events = save( + domain.event.CreateSubmission(**self.defaults), + domain.event.ConfirmContactInformation(**self.defaults), + domain.event.ConfirmAuthorship(**self.defaults), + domain.event.ConfirmPolicy(**self.defaults), + domain.event.SetTitle(title=self.title, **self.defaults), + domain.event.SetLicense(license_uri=CCO, + license_name="CC0 1.0", + **self.defaults), + domain.event.SetPrimaryClassification(category=self.category, + **self.defaults), + domain.event.SetUploadPackage(checksum="a9s9k342900ks03330029", + source_format=TEX, + identifier=123, + uncompressed_size=593992, + compressed_size=593992, + **self.defaults), + domain.event.SetAbstract(abstract="Very abstract " * 20, + **self.defaults), + domain.event.SetComments(comments="Fine indeed " * 10, + **self.defaults), + domain.event.SetJournalReference(journal_ref="Foo 1992", + **self.defaults), + domain.event.SetDOI(doi=self.doi, **self.defaults), + domain.event.SetAuthors(authors_display='Robert Paulson (FC)', + **self.defaults), + domain.event.FinalizeSubmission(**self.defaults) + ) + + # Announce the submission. + self.paper_id = '1901.00123' + with self.app.app_context(): + session = classic.current_session() + db_row = session.query(classic.models.Submission).first() + db_row.status = classic.models.Submission.ANNOUNCED + dated = (datetime.now() - datetime.utcfromtimestamp(0)) + db_row.document = classic.models.Document( + document_id=1, + paper_id=self.paper_id, + title=self.submission.metadata.title, + authors=self.submission.metadata.authors_display, + dated=dated.total_seconds(), + primary_subject_class=self.category, + created=datetime.now(UTC), + submitter_email=self.submission.creator.email, + submitter_id=self.submission.creator.native_id + ) + db_row.doc_paper_id = self.paper_id + session.add(db_row) + session.commit() + + # Request cross-list classification + category = "cs.IR" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestWithdrawal(reason='A good reason', + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + request_id = self.submission.active_user_requests[0].request_id + self.submission, self.events = save( + domain.event.CancelRequest(request_id=request_id, + **self.defaults), + submission_id=self.submission.submission_id + ) + + def tearDown(self): + """Clear the database after each test.""" + with self.app.app_context(): + classic.drop_all() + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_request_is_cancelled(self): + """Submitter has cancelled the withdrawal request.""" + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 2, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.WITHDRAWAL, + "The second row has type 'wdr'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + + @mock.patch(f'{core.__name__}.StreamPublisher', mock.MagicMock()) + def test_user_can_make_another_request(self): + """User can now make another request.""" + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestWithdrawal(reason='A better reason', + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + submission, events = load(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + with self.app.app_context(): + submission = load_fast(self.submission.submission_id) + self.assertEqual(submission.status, + domain.submission.Submission.ANNOUNCED, + "The submission is ANNOUNCED.") + + # Check the database state. + with self.app.app_context(): + session = classic.current_session() + db_rows = session.query(classic.models.Submission) \ + .order_by(classic.models.Submission.submission_id.asc()) \ + .all() + + self.assertEqual(len(db_rows), 3, + "There are two rows in the submission table") + self.assertEqual(db_rows[0].type, + classic.models.Submission.NEW_SUBMISSION, + "The first row has type 'new'") + self.assertEqual(db_rows[0].status, + classic.models.Submission.ANNOUNCED, + "The first row is ANNOUNCED") + self.assertEqual(db_rows[1].type, + classic.models.Submission.WITHDRAWAL, + "The second row has type 'wdr'") + self.assertEqual(db_rows[1].status, + classic.models.Submission.USER_DELETED, + "The second row is USER_DELETED") + self.assertEqual(db_rows[2].type, + classic.models.Submission.WITHDRAWAL, + "The third row has type 'wdr'") + self.assertEqual(db_rows[2].status, + classic.models.Submission.PROCESSING_SUBMISSION, + "The third row is PROCESSING_SUBMISSION") + + with self.app.app_context(): + request_id = self.submission.active_user_requests[-1].request_id + self.submission, self.events = save( + domain.event.CancelRequest(request_id=request_id, + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + self.submission, self.events = save( + domain.event.RequestWithdrawal(reason='A better reason', + **self.defaults), + submission_id=self.submission.submission_id + ) + + with self.app.app_context(): + request_id = self.submission.active_user_requests[-1].request_id + self.submission, self.events = save( + domain.event.CancelRequest(request_id=request_id, + **self.defaults), + submission_id=self.submission.submission_id + ) + submission, events = load(self.submission.submission_id) + self.assertEqual(len(submission.active_user_requests), 0) diff --git a/core/arxiv/submission/tests/schedule/__init__.py b/core/arxiv/submission/tests/schedule/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/arxiv/submission/tests/schedule/test_schedule.py b/core/arxiv/submission/tests/schedule/test_schedule.py new file mode 100644 index 0000000..f782980 --- /dev/null +++ b/core/arxiv/submission/tests/schedule/test_schedule.py @@ -0,0 +1,62 @@ +"""Tests for :mod:`.schedule`.""" + +from unittest import TestCase +from datetime import datetime, timedelta +from pytz import timezone, UTC +from ... import schedule + +ET = timezone('US/Eastern') + + +class TestSchedule(TestCase): + """Verify that scheduling functions work as expected.""" + + def test_monday_morning(self): + """E-print was submitted on Monday morning.""" + submitted = ET.localize(datetime(2019, 3, 18, 9, 47, 0)) + self.assertEqual(schedule.next_announcement_time(submitted), + ET.localize(datetime(2019, 3, 18, 20, 0, 0)), + "Will be announced at 8pm this evening") + self.assertEqual(schedule.next_freeze_time(submitted), + ET.localize(datetime(2019, 3, 18, 14, 0, 0)), + "Freeze time is 2pm this afternoon") + + def test_monday_late_afternoon(self): + """E-print was submitted on Monday in the late afternoon.""" + submitted = ET.localize(datetime(2019, 3, 18, 15, 32, 0)) + self.assertEqual(schedule.next_announcement_time(submitted), + ET.localize(datetime(2019, 3, 19, 20, 0, 0)), + "Will be announced at 8pm tomorrow evening") + self.assertEqual(schedule.next_freeze_time(submitted), + ET.localize(datetime(2019, 3, 19, 14, 0, 0)), + "Freeze time is 2pm tomorrow afternoon") + + def test_monday_evening(self): + """E-print was submitted on Monday in the evening.""" + submitted = ET.localize(datetime(2019, 3, 18, 22, 32, 0)) + self.assertEqual(schedule.next_announcement_time(submitted), + ET.localize(datetime(2019, 3, 19, 20, 0, 0)), + "Will be announced at 8pm tomorrow evening") + self.assertEqual(schedule.next_freeze_time(submitted), + ET.localize(datetime(2019, 3, 19, 14, 0, 0)), + "Freeze time is 2pm tomorrow afternoon") + + def test_saturday(self): + """E-print was submitted on a Saturday.""" + submitted = ET.localize(datetime(2019, 3, 23, 22, 32, 0)) + self.assertEqual(schedule.next_announcement_time(submitted), + ET.localize(datetime(2019, 3, 25, 20, 0, 0)), + "Will be announced at 8pm next Monday") + self.assertEqual(schedule.next_freeze_time(submitted), + ET.localize(datetime(2019, 3, 25, 14, 0, 0)), + "Freeze time is 2pm next Monday") + + def test_friday_afternoon(self): + """E-print was submitted on a Friday in the early afternoon.""" + submitted = ET.localize(datetime(2019, 3, 22, 13, 32, 0)) + self.assertEqual(schedule.next_announcement_time(submitted), + ET.localize(datetime(2019, 3, 24, 20, 0, 0)), + "Will be announced at 8pm on Sunday") + self.assertEqual(schedule.next_freeze_time(submitted), + ET.localize(datetime(2019, 3, 22, 14, 0, 0)), + "Freeze time is 2pm that same day") diff --git a/core/arxiv/submission/tests/serializer/__init__.py b/core/arxiv/submission/tests/serializer/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/core/arxiv/submission/tests/serializer/test_serializer.py b/core/arxiv/submission/tests/serializer/test_serializer.py new file mode 100644 index 0000000..d1349dd --- /dev/null +++ b/core/arxiv/submission/tests/serializer/test_serializer.py @@ -0,0 +1,151 @@ +from unittest import TestCase +from datetime import datetime +from pytz import UTC +from dataclasses import asdict +import json + +from ...serializer import dumps, loads +from ...domain.event import CreateSubmission, SetTitle +from ...domain.agent import User, System, Client +from ...domain.submission import Submission, SubmissionContent, License, \ + Classification, CrossListClassificationRequest, Hold, Waiver +from ...domain.proposal import Proposal +from ...domain.process import ProcessStatus +from ...domain.annotation import Feature, Comment +from ...domain.flag import ContentFlag + + +class TestDumpLoad(TestCase): + """Tests for :func:`.dumps` and :func:`.loads`.""" + + def test_dump_createsubmission(self): + """Serialize and deserialize a :class:`.CreateSubmission` event.""" + user = User('123', 'foo@user.com', 'foouser') + event = CreateSubmission(creator=user, created=datetime.now(UTC)) + data = dumps(event) + self.assertDictEqual(asdict(user), json.loads(data)["creator"], + "User data is fully encoded") + deserialized = loads(data) + self.assertEqual(deserialized, event) + self.assertEqual(deserialized.creator, user) + self.assertEqual(deserialized.created, event.created) + + def test_dump_load_submission(self): + """Serialize and deserialize a :class:`.Submission`.""" + user = User('123', 'foo@user.com', 'foouser') + + client = Client('fooclient', 'asdf') + system = System('testprocess') + submission = Submission( + creator=user, + owner=user, + client=client, + created=datetime.now(UTC), + updated=datetime.now(UTC), + submitted=datetime.now(UTC), + source_content=SubmissionContent( + identifier='12345', + checksum='asdf1234', + uncompressed_size=435321, + compressed_size=23421, + source_format=SubmissionContent.Format.TEX + ), + primary_classification=Classification(category='cs.DL'), + secondary_classification=[Classification(category='cs.AI')], + submitter_contact_verified=True, + submitter_is_author=True, + submitter_accepts_policy=True, + submitter_confirmed_preview=True, + license=License('http://foolicense.org/v1', 'The Foo License'), + status=Submission.ANNOUNCED, + arxiv_id='1234.56789', + version=2, + user_requests={ + 'asdf1234': CrossListClassificationRequest('asdf1234', user) + }, + proposals={ + 'prop1234': Proposal( + event_id='prop1234', + creator=user, + proposed_event_type=SetTitle, + proposed_event_data={'title': 'foo title'} + ) + }, + processes=[ + ProcessStatus( + creator=system, + created=datetime.now(UTC), + status=ProcessStatus.Status.SUCCEEDED, + process='FooProcess' + ) + ], + annotations={ + 'asdf123543': Feature( + event_id='asdf123543', + created=datetime.now(UTC), + creator=system, + feature_type=Feature.Type.PAGE_COUNT, + feature_value=12345678.32 + ) + }, + flags={ + 'fooflag1': ContentFlag( + event_id='fooflag1', + creator=system, + created=datetime.now(UTC), + flag_type=ContentFlag.Type.LOW_STOP, + flag_data=25, + comment='no comment' + ) + }, + comments={ + 'asdf54321': Comment( + event_id='asdf54321', + creator=system, + created=datetime.now(UTC), + body='here is comment' + ) + }, + holds={ + 'foohold1234': Hold( + event_id='foohold1234', + creator=system, + hold_type=Hold.Type.SOURCE_OVERSIZE, + hold_reason='the best reason' + ) + }, + waivers={ + 'waiver1234': Waiver( + event_id='waiver1234', + waiver_type=Hold.Type.SOURCE_OVERSIZE, + waiver_reason='it is ok', + created=datetime.now(UTC), + creator=system + ) + } + ) + raw = dumps(submission) + loaded = loads(raw) + + self.assertEqual(submission.creator, loaded.creator) + self.assertEqual(submission.owner, loaded.owner) + self.assertEqual(submission.client, loaded.client) + self.assertEqual(submission.created, loaded.created) + self.assertEqual(submission.updated, loaded.updated) + self.assertEqual(submission.submitted, loaded.submitted) + self.assertEqual(submission.source_content, loaded.source_content) + self.assertEqual(submission.source_content.source_format, + loaded.source_content.source_format) + self.assertEqual(submission.primary_classification, + loaded.primary_classification) + self.assertEqual(submission.secondary_classification, + loaded.secondary_classification) + self.assertEqual(submission.license, loaded.license) + self.assertEqual(submission.user_requests, loaded.user_requests) + self.assertEqual(submission.proposals, loaded.proposals) + self.assertEqual(submission.processes, loaded.processes) + self.assertEqual(submission.annotations, loaded.annotations) + self.assertEqual(submission.flags, loaded.flags) + self.assertEqual(submission.comments, loaded.comments) + self.assertEqual(submission.holds, loaded.holds) + self.assertEqual(submission.waivers, loaded.waivers) diff --git a/core/arxiv/submission/tests/util.py b/core/arxiv/submission/tests/util.py new file mode 100644 index 0000000..439190c --- /dev/null +++ b/core/arxiv/submission/tests/util.py @@ -0,0 +1,25 @@ +from typing import Optional +from contextlib import contextmanager + +from flask import Flask + +from ..services import classic + + +@contextmanager +def in_memory_db(app: Optional[Flask] = None): + """Provide an in-memory sqlite database for testing purposes.""" + if app is None: + app = Flask('foo') + app.config['CLASSIC_DATABASE_URI'] = 'sqlite://' + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False + + with app.app_context(): + classic.init_app(app) + classic.create_all() + try: + yield classic.current_session() + except Exception: + raise + finally: + classic.drop_all() diff --git a/core/events/__init__.py b/core/events/__init__.py deleted file mode 100644 index 5a38aa3..0000000 --- a/core/events/__init__.py +++ /dev/null @@ -1,289 +0,0 @@ -""" -Core event-centric data abstraction for the submission & moderation subsystem. - -This package provides an event-based API for CRUD operations on submissions -and submission-related (meta)data. Management of submission content (i.e. -source files) is out of scope. - -Rather than perform CRUD operations directly on submission objects, all -operations that modify submission data are performed through the creation of -submission events. This ensures that we have a precise and complete record of -activities concerning submissions, and provides an explicit definition of -operations that can be performed within the arXiv submission system. - -Event classes are defined in :mod:`events.domain.event`, and are accessible -from the root namespace of this package. Each event type defines a -transformation/operation on a single submission, and defines the data required -to perform that operation. Events are played forward, in order, to derive the -state of a submission. For more information about how event types are defined, -see :class:`events.domain.event.Event`. - -Using events -============ - -Event types are `PEP 557 data classes -`_. Each event type inherits from -:class:`.Event`, and may add additional fields. See :class:`.Event` for more -information about common fields. - -To create a new event, initialize the class with the relevant -data, and commit the event using :func:`.save`. For example: - -.. code-block:: python - - >>> import events - >>> user = events.User(123, "joe@bloggs.com") - >>> metadata = [('title', 'A new theory of foo')] - >>> update = events.UpdateMetadata(creator=user, metadata=metadata) - >>> submission = events.save(creation, submission_id=12345) - - -Several things will occur: - -1. If the events are for a submission that already exists, the latest state of - that submission will be obtained. -2. New events will be validated and applied to the submission in the order that - they were passed to :func:`.save`. If an event is invalid (e.g. the - submission is not in an appropriate state for the operation), an - :class:`.InvalidEvent` exception will be raised. Note that at this point - nothing has been changed in the database; the attempt is simply abandoned. -3. The new events are stored in the database, as is the latest state of the - submission. Creation of events and creation/update of the submission are - performed as a single atomic transaction. If anything goes wrong during the - update operation, all changes are abandoned and a :class:`.RuntimeError` - exception is raised. -4. If the notification service is configured, a message about the event is - propagated as a Kinesis event on the configured stream. See - :mod:`events.services.notification` for details. - - -Special case: creation ----------------------- -Note that if the first event is a :class:`.CreateSubmission` the -submission ID need not be provided, as we won't know what it is yet. For -example: - -.. code-block:: python - - import events - - >>> user = events.User(123, "joe@bloggs.com") - >>> creation = events.CreateSubmission(creator=user) - >>> metadata = [('title', 'A new theory of foo')] - >>> update = events.UpdateMetadata(creator=user, metadata=metadata) - >>> submission = events.save(creation, update) - >>> submission.submission_id - 40032 - - -""" - -from typing import Optional, List, Tuple -from arxiv.base import logging -from events.domain.submission import Submission, SubmissionMetadata, Author -from events.domain.agent import Agent, User, System, Client -from events.domain.event import ( - Event, CreateSubmission, RemoveSubmission, VerifyContactInformation, - AssertAuthorship, AcceptPolicy, SetPrimaryClassification, UpdateMetadata, - AddSecondaryClassification, RemoveSecondaryClassification, SelectLicense, - AttachSourceContent, UpdateAuthors, CreateComment, DeleteComment, - AddDelegate, RemoveDelegate, FinalizeSubmission, UnFinalizeSubmission -) -from events.domain.rule import RuleCondition, RuleConsequence, EventRule -from events.services import classic -from events.exceptions import InvalidEvent, InvalidStack, NoSuchSubmission, SaveError - -logger = logging.getLogger(__name__) - - -def load(submission_id: str) -> Tuple[Submission, List[Event]]: - """ - Load a submission and its history. - - Parameters - ---------- - submission_id : str - Submission identifier. - - Returns - ------- - :class:`events.domain.submission.Submission` - The current state of the submission. - list - Items are :class:`.Event`s, in order of their occurrence. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised when a submission with the passed ID cannot be found. - """ - try: - return classic.get_submission(submission_id) - except classic.NoSuchSubmission as e: - raise NoSuchSubmission(f'No submission with id {submission_id}') from e - - -def save(*events: Event, submission_id: Optional[str] = None) \ - -> Tuple[Submission, List[Event]]: - """ - Commit a set of new :class:`.Event`s for a submission. - - This will persist the events to the database, along with the final - state of the submission, and generate external notification(s) on the - appropriate channels. - - Parameters - ---------- - events : :class:`.Event` - Events to apply and persist. - submission_id : int - The unique ID for the submission, if available. If not provided, it is - expected that ``events`` includes a :class:`.CreateSubmission`. - - Returns - ------- - :class:`events.domain.submission.Submission` - The state of the submission after all events (including rule-derived - events) have been applied. Updated with the submission ID, if a - :class:`.CreateSubmission` was included. - list - A list of :class:`.Event` instances applied to the submission. Note - that this list may contain more events than were passed, if event - rules were triggered. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised if ``submission_id`` is not provided and the first event is not - a :class:`.CreateSubmission`, or ``submission_id`` is provided but - no such submission exists. - :class:`.InvalidEvent` - If an invalid event is encountered, the entire operation is aborted - and this exception is raised. - :class:`.SaveError` - There was a problem persisting the events and/or submission state - to the database. - - """ - if len(events) == 0: - raise ValueError('Must pass at least one event') - - # Do some sanity checks before proceeding. - for event in events: - if submission_id is not None: - if event.submission_id is None: - event.submission_id = submission_id - if event.submission_id != submission_id: - raise InvalidEvent(event, - "Can't mix events for multiple submissions") - - # We want to play events from the beginning. - if submission_id is not None: - existing_events = classic.get_events(submission_id) - else: - existing_events = [] - combined = existing_events + list(events) - - # Load any relevant event rules for this submission. - rules = [] # database.get_rules(submission_id) - - # Calculate the state of the submission from old and new events. - submission, combined = _apply_events(combined, rules) - - # Update the submission ID to ensure the existing submission is updated. - if submission.submission_id is None: - submission.submission_id = submission_id # May still be None. - - # Persist in database; submission ID is updated after transaction. - try: - submission = classic.store_events(*combined, submission=submission) - except classic.CommitFailed as e: - logger.debug('Encountered CommitFailed exception: %s', str(e)) - raise SaveError('Failed to store events') from e - - for event in combined: - event.submission_id = submission.submission_id - return submission, combined - - -def _apply_rules(submission: Submission, event: Event, - rules: List[EventRule]) -> List[Event]: - """Generate new event(s) by applying rules to a submission event.""" - def _apply(rule: EventRule) -> bool: - return rule.condition(submission, event) - return [ - rule.consequence(submission, event) for rule in filter(_apply, rules) - ] - - -def _apply_events(events: List[Event], rules: List[EventRule], - submission: Optional[Submission] = None) \ - -> Tuple[Submission, List[Event]]: - """ - Apply a set of events in order. - - Parameters - ---------- - events : list - Items are :class:`.Event` instances. - rules : list - Items are :class:`.EventRule` instances. - submission : :class:`.Submission` or None - Starting state from which to begin applying ``events``. If - ``submission`` is not provided, ``events`` must contain a - :class:`.CreateSubmission`. - - Returns - ------- - :class:`.Submission` - Submission state after events have been applied. - list - Items are :class:`.Event`s that have been applied, including any - additional events generated by ``rules``. - - Raises - ------ - :class:`.NoSuchSubmission` - If ``submission`` is not provided, and the first event is not a - :class:`.CreateSubmission`, there's not much else to go on. - :class:`.InvalidStack` - If at least one invalid event is encountered, this exception is raised. - - """ - events = sorted(events, key=lambda e: e.created) - - # Need either a creation event or a submission state from which to start. - if not isinstance(events[0], CreateSubmission) and submission is None: - raise NoSuchSubmission('No creation, and submission not provided') - - extra_events: List[Event] = [] # Generated by applied rules. - - invalid_events: List[InvalidEvent] = [] - for event in events: - try: - event.validate(submission) # Will throw InvalidEvent. - except InvalidEvent as e: - invalid_events.append(e) - continue - - if isinstance(event, CreateSubmission): - submission = event.apply() - else: - submission = event.apply(submission) - - if not event.committed: # Don't create duplicate rule-derived events. - # Any rule-derived events should be applied before moving on. - _extra = _apply_rules(submission, event, rules) - if len(_extra) > 0: - try: - submission, _extra = _apply_events(_extra, rules, submission) - extra_events += _extra - except InvalidStack as e: - # TODO: Handle merging of stacks - invalid_events.extend(e.event_exceptions) - - if invalid_events: - raise InvalidStack(invalid_events) - - return submission, sorted(events + extra_events, key=lambda e: e.created) - diff --git a/core/events/domain/__init__.py b/core/events/domain/__init__.py deleted file mode 100644 index 359d068..0000000 --- a/core/events/domain/__init__.py +++ /dev/null @@ -1,6 +0,0 @@ -"""Core data structures for the submission and moderation system.""" - -from .submission import Submission, License, SubmissionMetadata, \ - Classification, Author -from .agent import User, System, Client, Agent -from .event import event_factory, Event diff --git a/core/events/domain/agent.py b/core/events/domain/agent.py deleted file mode 100644 index c1bb340..0000000 --- a/core/events/domain/agent.py +++ /dev/null @@ -1,138 +0,0 @@ -"""Data structures for agents.""" - -import hashlib -from typing import Any, Optional - -from dataclasses import dataclass, field -from dataclasses import asdict - -__all__ = ('Agent', 'User', 'System', 'Client', 'agent_factory') - - -@dataclass -class Agent: - """ - Base class for agents in the submission system. - - An agent is an actor/system that generates/is responsible for events. - """ - - native_id: str - """Type-specific identifier for the agent. This might be an URI.""" - - FIELDS = [] - - @property - def agent_type(self): - """The name of the agent instance's class.""" - return self.get_agent_type() - - @classmethod - def get_agent_type(cls): - """Get the name of the instance's class.""" - return cls.__name__ - - @property - def agent_identifier(self): - """ - Unique identifier for the agent instance. - - Based on both the agent type and native ID. - """ - h = hashlib.new('sha1') - h.update(b'%s:%s' % (self.agent_type.encode('utf-8'), - str(self.native_id).encode('utf-8'))) - return h.hexdigest() - - @classmethod - def from_dict(cls, data: dict) -> Any: - """Instantiate an :class:`.Agent` instance from a dict.""" - agent_type = data.pop('agent_type', None) - native_id = data.pop('native_id', None) - if agent_type is None and type(cls) is Agent: - raise ValueError('agent_type not provided') - return agent_factory(agent_type, native_id, **data) - - def __eq__(self, other: Any) -> bool: - """Equality comparison for agents based on type and identifier.""" - if not isinstance(other, self.__class__): - return False - return self.agent_identifier == other.agent_identifier - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Agent`.""" - return { - 'native_id': self.native_id, - 'agent_type': self.agent_type - } - - -@dataclass -class User(Agent): - """An (human) end user.""" - - email: str - forename: str = field(default_factory=str) - surname: str = field(default_factory=str) - suffix: str = field(default_factory=str) - identifier: Optional[str] = field(default=None) - affiliation: str = field(default_factory=str) - - FIELDS = [ - 'email', 'forename', 'surname', 'suffix', 'identifier', 'affiliation' - ] - - @property - def name(self): - """Full name of the user.""" - return f"{self.forename} {self.surname} {self.suffix}" - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.User`.""" - data = super(User, self).to_dict() - data['name'] = self.name - data['forename'] = self.forename - data['surname'] = self.surname - data['suffix'] = self.suffix - data['email'] = self.email - data['identifier'] = self.identifier - data['affiliation'] = self.affiliation - data['user_id'] = self.native_id - return data - - -# TODO: extend this to support arXiv-internal services. -@dataclass -class System(Agent): - """The submission application (this application).""" - - FIELDS = [] - - -@dataclass -class Client(Agent): - """A non-human third party, usually an API client.""" - - FIELDS = [] - - def to_dict(self): - """Generate a dict representation of this :class:`.Client` instance.""" - return { - 'client_id': self.native_id - } - - -_agent_types = { - User.get_agent_type(): User, - System.get_agent_type(): System, - Client.get_agent_type(): Client, -} - - -def agent_factory(agent_type: str, native_id: Any, **extra) -> Agent: - """Instantiate a subclass of :class:`.Agent`.""" - if agent_type not in _agent_types: - raise ValueError(f'No such agent type: {agent_type}') - klass = _agent_types[agent_type] - extra = {k: v for k, v in extra.items() if k in klass.FIELDS} - return klass(native_id=native_id, **extra) diff --git a/core/events/domain/event.py b/core/events/domain/event.py deleted file mode 100644 index 16ec3a4..0000000 --- a/core/events/domain/event.py +++ /dev/null @@ -1,727 +0,0 @@ -""" -Data structures for submissions events. - -- Events have unique identifiers generated from their data (creation, agent, - submission). -- Events provide methods to update a submission based on the event data. -- Events provide validation methods for event data. - -Writing new events/commands -=========================== - -Events/commands are implemented as classes that inherit from :class:`.Event`. -It should: - -- Be a dataclass (i.e. be decorated with :func:`dataclasses.dataclass`). -- Define (using :func:`dataclasses.field`) associated data. -- Implement a validation method with the signature - ``validate(self, submission: Submission) -> None`` (see below). -- Implement a projection method with the signature - ``project(self, submission: Submission) -> Submission:`` that mutates - the passed :class:`.Submission` instance. -- Be fully documented. Be sure that the class docstring fully describes the - meaning of the event/command, and that both public and private methods have - at least a summary docstring. -- Have a corresponding :class:`unittest.TestCase` in - :mod:`events.domain.tests.test_events`. - -Adding validation to events -=========================== - -Each command/event class should implement an instance method -``validate(self, submission: Submission) -> None`` that raises -:class:`.InvalidEvent` exceptions if the data on the event instance is not -valid. - -For clarity, it's a good practice to individuate validation steps as separate -private instance methods, and call them from the public ``validate`` method. -This makes it easier to identify which validation criteria are being applied, -in what order, and what those criteria mean. - -See :class:`.SetPrimaryClassification` for an example. - -We could consider standalone validation functions for validation checks that -are performed on several event types (instead of just private instance -methods). - -""" - -import hashlib -import re -from datetime import datetime -from typing import Optional, TypeVar, List, Tuple, Any, Dict -from urllib.parse import urlparse -from dataclasses import dataclass, field -from dataclasses import asdict - -from arxiv.util import schema -from arxiv import taxonomy - -from .agent import Agent -from .submission import Submission, SubmissionMetadata, Author, \ - Classification, License, Delegation, Comment, Flag, Proposal, \ - SubmissionContent - -from events.exceptions import InvalidEvent - - -@dataclass -class Event: - """Base class for submission-related events.""" - - creator: Agent - """ - The agent responsible for the operation represented by this event. - - This is **not** necessarily the creator of the submission. - """ - - created: datetime = field(default_factory=datetime.now) - """ - The timestamp when the event was originally committed. - - This should generally not be set from outside this package. - """ - - proxy: Optional[Agent] = field(default=None) - """ - The agent who facilitated the operation on behalf of the :prop:`.creator`. - - This may be an API client, or another user who has been designated as a - proxy. Note that proxy implies that the creator was not directly involved. - """ - - client: Optional[Agent] = field(default=None) - """ - The client through which the :prop:`.creator` performed the operation. - - If the creator was directly involved in the operation, this property should - be the client that facilitated the operation. - """ - - submission_id: Optional[int] = field(default=None) - """ - The primary identifier of the submission being operated upon. - - This is defined as optional to support creation events, and to facilitate - chaining of events with creation events in the same transaction. - """ - - committed: bool = field(default=False) - """ - Indicates whether the event has been committed to the database. - - This should generally not be set from outside this package. - """ - - @property - def event_type(self) -> str: - """The name (str) of the event type.""" - return self.get_event_type() - - @classmethod - def get_event_type(cls) -> str: - """Get the name (str) of the event type.""" - return cls.__name__ - - @property - def event_id(self) -> str: - """The unique ID for this event.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.created.isoformat().encode('utf-8'), - self.event_type.encode('utf-8'), - self.creator.agent_identifier.encode('utf-8'))) - return h.hexdigest() - - def apply(self, submission: Optional[Submission] = None) -> Submission: - """Apply the projection for this :class:`.Event` instance.""" - if submission: - submission = self.project(submission) - else: - submission = self.project() - submission.updated = self.created - return submission - - def to_dict(self): - """Generate a dict representation of this :class:`.Event`.""" - data = asdict(self) - data.update({ - 'creator': self.creator.to_dict(), - 'proxy': self.proxy.to_dict() if self.proxy else None, - 'client': self.client.to_dict() if self.client else None, - 'created': self.created.isoformat(), - }) - return data - - -# Events related to the creation of a new submission. -# -# These are largely the domain of the metadata API, and the submission UI. - - -@dataclass(init=False) -class CreateSubmission(Event): - """Creation of a new :class:`events.domain.submission.Submission`.""" - - def validate(self, *args, **kwargs) -> None: - """Validate creation of a submission.""" - return - - def project(self) -> Submission: - """Create a new :class:`.Submission`.""" - return Submission(creator=self.creator, created=self.created, - owner=self.creator, proxy=self.proxy, - client=self.client) - - -@dataclass(init=False) -class RemoveSubmission(Event): - """Removal of a :class:`events.domain.submission.Submission`.""" - - def validate(self, submission: Submission) -> None: - """Validate removal of a submission.""" - return - - def project(self, submission: Submission) -> Submission: - """Remove the :class:`.Submission` from the system (set inactive).""" - submission.active = False - return submission - - -@dataclass(init=False) -class VerifyContactInformation(Event): - """Submitter has verified their contact information.""" - - def validate(self, submission: Submission) -> None: - """Cannot apply to a finalized submission.""" - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Update :prop:`.Submission.submitter_contact_verified`.""" - submission.submitter_contact_verified = True - return submission - - -@dataclass -class AssertAuthorship(Event): - """The submitting user asserts whether they are an author of the paper.""" - - submitter_is_author: bool = True - - def validate(self, submission: Submission) -> None: - """Cannot apply to a finalized submission.""" - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Update the authorship flag on the submission.""" - submission.submitter_is_author = self.submitter_is_author - return submission - - -@dataclass -class AcceptPolicy(Event): - """The submitting user accepts the arXiv submission policy.""" - - def validate(self, submission: Submission) -> None: - """Cannot apply to a finalized submission.""" - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Set the policy flag on the submission.""" - submission.submitter_accepts_policy = True - return submission - - -@dataclass -class SetPrimaryClassification(Event): - """Update the primary classification of a submission.""" - - category: Optional[str] = None - - def validate(self, submission: Submission) -> None: - """Validate the primary classification category.""" - self._must_be_a_valid_category(submission) - self._primary_cannot_be_secondary(submission) - submission_is_not_finalized(self, submission) - - def _must_be_a_valid_category(self, submission: Submission) -> None: - """Valid arXiv categories are defined in :mod:`arxiv.taxonomy`.""" - if not self.category or self.category not in taxonomy.CATEGORIES: - raise InvalidEvent(self, f"Not a valid category: {self.category}") - - def _primary_cannot_be_secondary(self, submission: Submission) -> None: - """The same category can't be used for both primary and secondary.""" - secondaries = [c.category for c in submission.secondary_classification] - if self.category in secondaries: - raise InvalidEvent(self, - "The same category cannot be used as both the" - " primary and a secondary category.") - - def project(self, submission: Submission) -> Submission: - """Set :prop:`.Submission.primary_classification`.""" - clsn = Classification(category=self.category) - submission.primary_classification = clsn - return submission - - -@dataclass -class AddSecondaryClassification(Event): - """Add a secondary :class:`.Classification` to a submission.""" - - category: Optional[str] = field(default=None) - - def validate(self, submission: Submission) -> None: - """Validate the secondary classification category to add.""" - self._must_be_a_valid_category(submission) - self._primary_cannot_be_secondary(submission) - self._must_not_already_be_present(submission) - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Add a :class:`.Classification` as a secondary classification.""" - classification = Classification(category=self.category) - submission.secondary_classification.append(classification) - return submission - - def _must_be_a_valid_category(self, submission: Submission) -> None: - """Valid arXiv categories are defined in :mod:`arxiv.taxonomy`.""" - if not self.category or self.category not in taxonomy.CATEGORIES: - raise InvalidEvent(self, "Not a valid category") - - def _primary_cannot_be_secondary(self, submission: Submission) -> None: - """The same category can't be used for both primary and secondary.""" - if submission.primary_classification is None: - return - if self.category == submission.primary_classification.category: - raise InvalidEvent(self, - "The same category cannot be used as both the" - " primary and a secondary category.") - - def _must_not_already_be_present(self, submission: Submission) -> None: - """The same category cannot be added as a secondary twice.""" - secondaries = [c.category for c in submission.secondary_classification] - if self.category in secondaries: - raise InvalidEvent(self, - f"Secondary {self.category} already set" - f" on this submission.") - - -@dataclass -class RemoveSecondaryClassification(Event): - """Remove secondary :class:`.Classification` from submission.""" - - category: Optional[str] = field(default=None) - - def validate(self, submission: Submission) -> None: - """Validate the secondary classification category to remove.""" - self._must_be_a_valid_category(submission) - self._must_already_be_present(submission) - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Remove from :prop:`.Submission.secondary_classification`.""" - submission.secondary_classification = [ - classn for classn in submission.secondary_classification - if not classn.category == self.category - ] - return submission - - def _must_be_a_valid_category(self, submission: Submission) -> None: - """Valid arXiv categories are defined in :mod:`arxiv.taxonomy`.""" - if not self.category or self.category not in taxonomy.CATEGORIES: - raise InvalidEvent(self, "Not a valid category") - - def _must_already_be_present(self, submission: Submission) -> None: - """One cannot remove a secondary that is not actually set.""" - current = [c.category for c in submission.secondary_classification] - if self.category not in current: - raise InvalidEvent(self, 'No such category on submission') - - -@dataclass -class SelectLicense(Event): - """The submitter has selected a license for their submission.""" - - license_name: Optional[str] = field(default=None) - license_uri: Optional[str] = field(default=None) - - def validate(self, submission: Submission) -> None: - """Validate the selected license.""" - submission_is_not_finalized(self, submission) - - def project(self, submission: Submission) -> Submission: - """Set :prop:`.Submission.license`.""" - submission.license = License( - name=self.license_name, - uri=self.license_uri - ) - return submission - - -# TODO: consider representing some of these as distinct events/commands? -@dataclass -class UpdateMetadata(Event): - """Update the descriptive metadata for a submission.""" - - metadata: List[Tuple[str, Any]] = field(default_factory=list) - - FIELDS = [ - 'title', 'abstract', 'doi', 'msc_class', 'acm_class', - 'report_num', 'journal_ref' - ] - - # TODO: implement more specific validation here. - def validate(self, submission: Submission) -> None: - """The :prop:`.metadata` should be a list of tuples.""" - submission_is_not_finalized(self, submission) - try: - assert len(self.metadata) >= 1 - assert type(self.metadata[0]) in [tuple, list] - for metadatum in self.metadata: - assert len(metadatum) == 2 - except AssertionError as e: - raise InvalidEvent(self) from e - - def project(self, submission: Submission) -> Submission: - """Update metadata on a :class:`.Submission`.""" - for key, value in self.metadata: - setattr(submission.metadata, key, value) - return submission - - -@dataclass -class UpdateAuthors(Event): - """Update the authors on a :class:`.Submission`.""" - - authors: List[Author] = field(default_factory=list) - authors_display: Optional[str] = field(default=None) - """The authors string may be provided.""" - - def __post_init__(self): - """Autogenerate and/or clean display names.""" - if not self.authors_display: - self.authors_display = self._canonical_author_string() - self.authors_display = self._cleanup(self.authors_display) - - def validate(self, submission: Submission) -> None: - """May not apply to a finalized submission.""" - submission_is_not_finalized(self, submission) - self._does_not_contain_et_al() - - def _canonical_author_string(self) -> str: - """Canonical representation of authors, using display names.""" - return ", ".join([au.display for au in self.authors]) - - def _cleanup(self, s: str) -> str: - """Perform some light tidying on the provided author string(s).""" - s = re.sub(r"\s+", " ", s) # Single spaces only. - s = re.sub(r",(\s*,)+", ",", s) # Remove double commas. - # Add spaces between word and opening parenthesis. - s = re.sub(r"(\w)\(", "\g<1> (", s) - # Add spaces between closing parenthesis and word. - s = re.sub(r"\)(\w)", ") \g<1>", s) - # Change capitalized or uppercase `And` to `and`. - s = re.sub(r"\bA(?i:ND)\b", "and", s) - return s.strip() # Removing leading and trailing whitespace. - - def _does_not_contain_et_al(self) -> None: - """The authors display value should not contain `et al`.""" - if self.authors_display and \ - re.search(r"et al\.?($|\s*\()", self.authors_display): - raise InvalidEvent(self, "Authors should not contain et al.") - - def project(self, submission: Submission) -> Submission: - """Replace :prop:`.Submission.metadata.authors`.""" - submission.metadata.authors = self.authors - submission.metadata.authors_display = self.authors_display - return submission - - @classmethod - def from_dict(cls, **data) -> Submission: - """Override the default ``from_dict`` constructor to handle authors.""" - if 'authors' not in data: - raise ValueError('Missing authors') - data['authors'] = [Author(**au) for au in data['authors']] - return cls(**data) - - -@dataclass -class AttachSourceContent(Event): - """Add metadata about a source package to a submission.""" - - location: str = field(default_factory=str) - format: str = field(default_factory=str) - checksum: str = field(default_factory=str) - mime_type: str = field(default_factory=str) - # TODO: Examine the necessity of an identifier when we are storing URIs. - identifier: Optional[int] = field(default=None) - size: int = field(default=0) - - # TODO: This should be configurable somewhere. - ALLOWED_FORMATS = [ - 'pdftex', 'tex', 'pdf', 'ps', 'html', 'invalid' - ] - ALLOWED_MIME_TYPES = [ - 'application/tar+gzip', 'application/tar', 'application/zip' - ] - - def validate(self, submission: Submission) -> None: - """Validate data for :class:`.SubmissionContent`.""" - submission_is_not_finalized(self, submission) - try: - parsed = urlparse(self.location) - except ValueError as e: - raise InvalidEvent(self, 'Not a valid URL') from e - if not parsed.netloc.endswith('arxiv.org'): - raise InvalidEvent(self, 'External URLs not allowed.') - - if self.format not in self.ALLOWED_FORMATS: - raise InvalidEvent(self, f'Format {self.format} not allowed') - if not self.checksum: - raise InvalidEvent(self, 'Missing checksum') - if not self.identifier: - raise InvalidEvent(self, 'Missing upload ID') - - def project(self, submission: Submission) -> Submission: - """Replace :class:`.SubmissionContent` metadata on the submission.""" - submission.source_content = SubmissionContent( - location=self.location, - format=self.format, - checksum=self.checksum, - identifier=self.identifier, - mime_type=self.mime_type, - size=self.size - ) - return submission - - -@dataclass -class FinalizeSubmission(Event): - """Send the submission to the queue for announcement.""" - - REQUIRED = [ - 'creator', 'primary_classification', 'submitter_contact_verified', - 'submitter_accepts_policy', 'license', 'source_content', 'metadata', - ] - REQUIRED_METADATA = ['title', 'abstract', 'authors_display'] - - def validate(self, submission: Submission) -> None: - """Ensure that all required data/steps are complete.""" - if submission.finalized: - raise InvalidEvent(self, "Submission already finalized") - if not submission.active: - raise InvalidEvent(self, "Submision must be active") - self._required_fields_are_complete(submission) - - def project(self, submission: Submission) -> Submission: - """Set :prop:`Submission.finalized`.""" - submission.finalized = True - return submission - - def _required_fields_are_complete(self, submission: Submission) -> None: - """Verify that all required fields are complete.""" - for key in self.REQUIRED: - if not getattr(submission, key): - raise InvalidEvent(self, f"Missing {key}") - for key in self.REQUIRED_METADATA: - if not getattr(submission.metadata, key): - raise InvalidEvent(self, f"Missing {key}") - - -@dataclass -class UnFinalizeSubmission(Event): - """Withdraw the submission from the queue for announcement.""" - - def validate(self, submission: Submission) -> None: - """Validate the unfinalize action.""" - self._must_be_finalized(submission) - - def _must_be_finalized(self, submission: Submission) -> None: - """May only unfinalize a finalized submission.""" - if not submission.finalized: - raise InvalidEvent(self, "Submission is not finalized") - - def project(self, submission: Submission) -> Submission: - """Set :prop:`Submission.finalized`.""" - submission.finalized = False - submission.status = Submission.WORKING - return submission - - -# Moderation-related events. - - -@dataclass -class CreateComment(Event): - """Creation of a :class:`.Comment` on a :class:`.Submission`.""" - - read_scope = 'submission:moderate' - write_scope = 'submission:moderate' - - body: str = field(default_factory=str) - scope: str = 'private' - - def validate(self, submission: Submission) -> None: - """The :prop:`.body` should be set.""" - if not self.body: - raise ValueError('Comment body not set') - - def project(self, submission: Submission) -> Submission: - """Create a new :class:`.Comment` and attach it to the submission.""" - comment = Comment(creator=self.creator, created=self.created, - proxy=self.proxy, submission=submission, - body=self.body, scope=self.scope) - submission.comments[comment.comment_id] = comment - return submission - - -@dataclass -class DeleteComment(Event): - """Deletion of a :class:`.Comment` on a :class:`.Submission`.""" - - read_scope = 'submission:moderate' - write_scope = 'submission:moderate' - - comment_id: str = field(default_factory=str) - - def validate(self, submission: Submission) -> None: - """The :prop:`.comment_id` must present on the submission.""" - if self.comment_id is None: - raise InvalidEvent(self, 'comment_id is required') - if not hasattr(submission, 'comments') or not submission.comments: - raise InvalidEvent(self, 'Cannot delete comment that does not exist') - if self.comment_id not in submission.comments: - raise InvalidEvent(self, 'Cannot delete comment that does not exist') - - def project(self, submission: Submission) -> Submission: - """Remove the comment from the submission.""" - del submission.comments[self.comment_id] - return submission - - -@dataclass -class AddDelegate(Event): - """Owner delegates authority to another agent.""" - - delegate: Optional[Agent] = None - - def validate(self, submission: Submission) -> None: - """The event creator must be the owner of the submission.""" - if not self.creator == submission.owner: - raise InvalidEvent(self, 'Event creator must be submission owner') - - def project(self, submission: Submission) -> Submission: - """Add the delegate to the submission.""" - delegation = Delegation( - creator=self.creator, - delegate=self.delegate, - created=self.created - ) - submission.delegations[delegation.delegation_id] = delegation - return submission - - -@dataclass -class RemoveDelegate(Event): - """Owner revokes authority from another agent.""" - - delegation_id: str = field(default_factory=str) - - def validate(self, submission: Submission) -> None: - """The event creator must be the owner of the submission.""" - if not self.creator == submission.owner: - raise InvalidEvent(self, 'Event creator must be submission owner') - - def project(self, submission: Submission) -> Submission: - """Remove the delegate from the submission.""" - if self.delegation_id in submission.delegations: - del submission.delegations[self.delegation_id] - return submission - - -# class CreateSourcePackage(Event): -# pass -# -# class UpdateSourcePackage(Event): -# pass -# -# -# class DeleteSourcePackage(Event): -# pass -# -# -# class Annotation(Event): -# pass -# -# -# class CreateFlagEvent(AnnotationEvent): -# pass -# -# -# class DeleteFlagEvent(AnnotationEvent): -# pass -# -# -# class DeleteCommentEvent(AnnotationEvent): -# pass -# -# -# class CreateProposalEvent(AnnotationEvent): -# pass -# -# -# class DeleteProposalEvent(AnnotationEvent): -# pass - -EVENT_TYPES = { - obj.get_event_type(): obj for obj in locals().values() - if type(obj) is type and issubclass(obj, Event) -} - - -def event_factory(event_type: str, **data) -> Event: - """ - Convenience factory for generating :class:`.Event`s. - - Parameters - ---------- - event_type : str - Should be the name of a :class:`.Event` subclass. - data : kwargs - Keyword parameters passed to the event constructor. - - Return - ------ - :class:`.Event` - An instance of an :class:`.Event` subclass. - """ - if 'created' not in data: - data['created'] = datetime.now() - if event_type in EVENT_TYPES: - klass = EVENT_TYPES[event_type] - if hasattr(klass, 'from_dict'): - return klass.from_dict(**data) - return EVENT_TYPES[event_type](**data) - raise RuntimeError('Unknown event type: %s' % event_type) - - -# General-purpose validators go down here. -# TODO: should these be in a sub-module? This file is getting big. - -def submission_is_not_finalized(event: Event, submission: Submission) -> None: - """ - Verify that the submission is not finalized. - - Parameters - ---------- - event : :class:`.Event` - submission : :class:`.Submission` - - Raises - ------ - :class:`.InvalidEvent` - Raised if the submission is finalized. - - """ - if submission.finalized: - raise InvalidEvent(event, "Cannot apply to a finalized submission") diff --git a/core/events/domain/rule.py b/core/events/domain/rule.py deleted file mode 100644 index d71e090..0000000 --- a/core/events/domain/rule.py +++ /dev/null @@ -1,84 +0,0 @@ -""" -Conditional business logic as data. - -This is here for demonstration purposes only, and is likely to change -substantially in the short term. -""" - -from datetime import datetime -from typing import Callable, TypeVar, Optional - -from dataclasses import dataclass, field -from dataclasses import asdict - -from .agent import Agent, System -from .event import Event, event_factory -from .submission import Submission - - -EventRuleType = TypeVar('EventRuleType', bound='EventRule') - - -@dataclass -class RuleCondition: - """Evaluate whether or not the rule applies to an event.""" - - event_type: type - submission_id: Optional[int] = None - extra_condition: Optional[dict] = None - - def __call__(self, submission: Submission, event: Event) -> bool: - """Evaluate whether or not the rule applies to an event.""" - return type(event) is self.event_type and \ - self._callable_from_condition(submission, event) \ - and (self.submission_id is None - or self.submission_id == submission.submission_id) - - # TODO: implement some kind of DSL for evaluating submission state? - @property - def _callable_from_condition(self) -> Callable: - return lambda sub, event: True - - -@dataclass -class RuleConsequence: - """Generate a new event as a result of the rule.""" - - event_type: type - """The type of event to apply when the rule is triggered.""" - event_data: dict - """Data for the event applied when the rule is triggered.""" - - event_creator: Agent = field(default_factory=System) - """The agent responsible for the consequent event.""" - - def __call__(self, submission: Submission, event: Event) -> Event: - """Generate a new event as a result of the rule.""" - data = { # These are effectively defaults. - 'creator': self.event_creator, - 'proxy': None, - 'submission_id': submission.submission_id - } - data.update(self.event_data) - data['created'] = datetime.now() - # new_event = event_factory(self.event_type, **data) - new_event = self.event_type(**data) - if new_event.submission_id is None: - new_event.submission_id = submission.submission_id - if new_event.creator is None: - new_event.creator = self.event_creator - return new_event - - -@dataclass -class EventRule: - """Expresses conditional business logic to generate automated events.""" - - creator: Agent - condition: RuleCondition - consequence: RuleConsequence - rule_id: Optional[int] = None - proxy: Optional[Agent] = None - created: datetime = field(default_factory=datetime.now) - applied: bool = False - """Whether or not the rule has already been triggered and applied.""" diff --git a/core/events/domain/submission.py b/core/events/domain/submission.py deleted file mode 100644 index 61b5f34..0000000 --- a/core/events/domain/submission.py +++ /dev/null @@ -1,266 +0,0 @@ -"""Data structures for submissions.""" - -import hashlib -from typing import Optional, Dict, TypeVar, List -from datetime import datetime - -from dataclasses import dataclass, field -from dataclasses import asdict - -from .agent import Agent - - -@dataclass -class Classification: - """An archive/category classification for a :class:`.Submission`.""" - - category: str - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Classification`.""" - return asdict(self) - - -@dataclass -class License: - """An license for distribution of the submission.""" - - uri: str - name: Optional[str] = None - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.License`.""" - return asdict(self) - - -@dataclass -class Author: - """Represents an author of a submission.""" - - order: int = field(default=0) - forename: str = field(default_factory=str) - surname: str = field(default_factory=str) - initials: str = field(default_factory=str) - affiliation: str = field(default_factory=str) - email: str = field(default_factory=str) - identifier: Optional[str] = field(default=None) - display: Optional[str] = field(default=None) - """ - Submitter may include a preferred display name for each author. - - If not provided, will be automatically generated from the other fields. - """ - - def __post_init__(self) -> None: - """Auto-generate an identifier, if not provided.""" - if not self.identifier: - self.identifier = self._generate_identifier() - if not self.display: - self.display = self.canonical - - def _generate_identifier(self): - h = hashlib.new('sha1') - h.update(bytes(':'.join([self.forename, self.surname, self.initials, - self.affiliation, self.email]), - encoding='utf-8')) - return h.hexdigest() - - @property - def canonical(self): - """Canonical representation of the author name.""" - name = "%s %s %s" % (self.forename, self.initials, self.surname) - name = name.replace(' ', ' ') - if self.affiliation: - return "%s (%s)" % (name, self.affiliation) - return name - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Author`.""" - return asdict(self) - - -@dataclass -class SubmissionContent: - """Metadata about the submission source package and compiled products.""" - - location: str - format: str - mime_type: str - size: str - checksum: str - identifier: int - - -@dataclass -class SubmissionMetadata: - """Metadata about a :class:`.Submission` instance.""" - - title: Optional[str] = None - abstract: Optional[str] = None - - authors: list = field(default_factory=list) - authors_display: str = field(default_factory=str) - """The canonical arXiv author string.""" - - doi: Optional[str] = None - msc_class: Optional[str] = None - acm_class: Optional[str] = None - report_num: Optional[str] = None - journal_ref: Optional[str] = None - - comments: str = field(default_factory=str) - - def to_dict(self) -> dict: - """Generate dict representation of :class:`.SubmissionMetadata`.""" - return asdict(self) - - -@dataclass -class Delegation: - """Delegation of editing privileges to a non-owning :class:`.Agent`.""" - - delegate: Agent - creator: Agent - created: datetime = field(default_factory=datetime.now) - - @property - def delegation_id(self): - """Unique identifer for the delegation instance.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.delegate.agent_identifier, - self.creator.agent_identifier, - self.created.isodate())) - return h.hexdigest() - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Delegation`.""" - data = asdict(self) - data['delegation_id'] = self.delegation_id - return data - - -@dataclass -class Submission: - """Represents an arXiv submission object.""" - - WORKING = 'working' - SUBMITTED = 'submitted' - ON_HOLD = 'hold' - SCHEDULED = 'scheduled' - PUBLISHED = 'published' - ERROR = 'error' - DELETED = 'deleted' - - creator: Agent - owner: Agent - created: datetime - updated: Optional[datetime] = field(default=None) - - source_content: Optional[SubmissionContent] = field(default=None) - compiled_content: List[SubmissionContent] = field(default_factory=list) - - primary_classification: Optional[Classification] = field(default=None) - delegations: Dict[str, Delegation] = field(default_factory=dict) - proxy: Optional[Agent] = field(default=None) - client: Optional[Agent] = field(default=None) - submission_id: Optional[int] = field(default=None) - metadata: SubmissionMetadata = field(default_factory=SubmissionMetadata) - active: bool = field(default=True) - """Actively moving through the submission workflow.""" - - finalized: bool = field(default=False) - """Submitter has indicated submission is ready for publication.""" - - published: bool = field(default=False) - secondary_classification: List[Classification] = \ - field(default_factory=list) - submitter_contact_verified: bool = field(default=False) - submitter_is_author: bool = field(default=True) - submitter_accepts_policy: bool = field(default=False) - license: Optional[License] = field(default=None) - status: str = field(default=WORKING) - arxiv_id: Optional[str] = field(default=None) - """The published arXiv paper ID.""" - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Submission`.""" - data = asdict(self) - data.update({ - 'created': self.created.isoformat(), - 'updated': self.updated.isoformat() if self.updated else None, - 'metadata': self.metadata.to_dict(), - 'creator': self.creator.to_dict(), - 'owner': self.owner.to_dict(), - 'proxy': self.proxy.to_dict() if self.proxy else None, - 'client': self.client.to_dict() if self.client else None, - }) - return data - - -@dataclass -class Annotation: - """Auxilliary metadata used by the submission and moderation process.""" - - creator: Agent - submission: Submission - created: datetime - scope: str # TODO: document this. - proxy: Optional[Agent] - - @property - def annotation_type(self): - """Name (str) of the type of annotation.""" - return type(self).__name__ - - @property - def annotation_id(self): - """The unique identifier for an :class:`.Annotation` instance.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.created.isoformat().encode('utf-8'), - self.annotation_type.encode('utf-8'), - self.creator.agent_identifier.encode('utf-8'))) - return h.hexdigest() - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Annotation`.""" - data = asdict(self) - data['annotation_type'] = self.annotation_type - data['annotation_id'] = self.annotation_id - return data - - -@dataclass -class Proposal(Annotation): - """Represents a proposal to apply an event to a submission.""" - - event_type: type - event_data: dict - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Proposal`.""" - return asdict(self) - - -@dataclass -class Comment(Annotation): - """A freeform textual annotation.""" - - body: str - - @property - def comment_id(self): - """The unique identifier for a :class:`.Comment` instance.""" - return self.annotation_id - - def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Comment`.""" - data = asdict(self) - data['comment_id'] = self.comment_id - return data - - -@dataclass -class Flag(Annotation): - """Tags used to route submissions based on moderation policies.""" - - pass diff --git a/core/events/domain/tests/__init__.py b/core/events/domain/tests/__init__.py deleted file mode 100644 index 34a4d73..0000000 --- a/core/events/domain/tests/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""Tests for :mod:`events.domain`.""" diff --git a/core/events/domain/tests/test_events.py b/core/events/domain/tests/test_events.py deleted file mode 100644 index 73a6de0..0000000 --- a/core/events/domain/tests/test_events.py +++ /dev/null @@ -1,230 +0,0 @@ -"""Tests for :class:`.Event`s in :mod:`events.domain.event`.""" - -from unittest import TestCase, mock -from datetime import datetime -from arxiv import taxonomy -from events import save -from events.domain import event, agent, submission -from events.exceptions import InvalidEvent - - -class TestSetPrimaryClassification(TestCase): - """Test :class:`event.SetPrimaryClassification`.""" - - def setUp(self): - """Initialize auxiliary data for test cases.""" - self.user = agent.User(12345, 'uuser@cornell.edu') - self.submission = submission.Submission( - submission_id=1, - creator=self.user, - owner=self.user, - created=datetime.now() - ) - - def test_set_primary_with_nonsense(self): - """Category is not from the arXiv taxonomy.""" - e = event.SetPrimaryClassification( - creator=self.user, - submission_id=1, - category="nonsense" - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - def test_set_primary_with_valid_category(self): - """Category is from the arXiv taxonomy.""" - for category in taxonomy.CATEGORIES.keys(): - e = event.SetPrimaryClassification( - creator=self.user, - submission_id=1, - category=category - ) - try: - e.validate(self.submission) - except InvalidEvent as e: - self.fail("Event should be valid") - - def test_set_primary_already_secondary(self): - """Category is already set as a secondary.""" - classification = submission.Classification('cond-mat.dis-nn') - self.submission.secondary_classification.append(classification) - e = event.SetPrimaryClassification( - creator=self.user, - submission_id=1, - category='cond-mat.dis-nn' - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - -class TestAddSecondaryClassification(TestCase): - """Test :class:`event.AddSecondaryClassification`.""" - - def setUp(self): - """Initialize auxiliary data for test cases.""" - self.user = agent.User(12345, 'uuser@cornell.edu') - self.submission = submission.Submission( - submission_id=1, - creator=self.user, - owner=self.user, - created=datetime.now(), - secondary_classification=[] - ) - - def test_add_secondary_with_nonsense(self): - """Category is not from the arXiv taxonomy.""" - e = event.AddSecondaryClassification( - creator=self.user, - submission_id=1, - category="nonsense" - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - def test_add_secondary_with_valid_category(self): - """Category is from the arXiv taxonomy.""" - for category in taxonomy.CATEGORIES.keys(): - e = event.AddSecondaryClassification( - creator=self.user, - submission_id=1, - category=category - ) - try: - e.validate(self.submission) - except InvalidEvent as e: - self.fail("Event should be valid") - - def test_add_secondary_already_present(self): - """Category is already present on the submission.""" - self.submission.secondary_classification.append( - submission.Classification('cond-mat.dis-nn') - ) - e = event.AddSecondaryClassification( - creator=self.user, - submission_id=1, - category='cond-mat.dis-nn' - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - def test_add_secondary_already_primary(self): - """Category is already set as primary.""" - classification = submission.Classification('cond-mat.dis-nn') - self.submission.primary_classification = classification - - e = event.AddSecondaryClassification( - creator=self.user, - submission_id=1, - category='cond-mat.dis-nn' - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - -class TestRemoveSecondaryClassification(TestCase): - """Test :class:`event.RemoveSecondaryClassification`.""" - - def setUp(self): - """Initialize auxiliary data for test cases.""" - self.user = agent.User(12345, 'uuser@cornell.edu') - self.submission = submission.Submission( - submission_id=1, - creator=self.user, - owner=self.user, - created=datetime.now(), - secondary_classification=[] - ) - - def test_add_secondary_with_nonsense(self): - """Category is not from the arXiv taxonomy.""" - e = event.RemoveSecondaryClassification( - creator=self.user, - submission_id=1, - category="nonsense" - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - def test_remove_secondary_with_valid_category(self): - """Category is from the arXiv taxonomy.""" - classification = submission.Classification('cond-mat.dis-nn') - self.submission.secondary_classification.append(classification) - e = event.RemoveSecondaryClassification( - creator=self.user, - submission_id=1, - category='cond-mat.dis-nn' - ) - try: - e.validate(self.submission) - except InvalidEvent as e: - self.fail("Event should be valid") - - def test_remove_secondary_not_present(self): - """Category is not present.""" - e = event.RemoveSecondaryClassification( - creator=self.user, - submission_id=1, - category='cond-mat.dis-nn' - ) - with self.assertRaises(InvalidEvent): - e.validate(self.submission) # "Event should not be valid". - - -class TestUpdateAuthors(TestCase): - """Test :class:`event.UpdateAuthors`.""" - - def setUp(self): - """Initialize auxiliary data for test cases.""" - self.user = agent.User(12345, 'uuser@cornell.edu') - self.submission = submission.Submission( - submission_id=1, - creator=self.user, - owner=self.user, - created=datetime.now() - ) - - def test_canonical_authors_provided(self): - """Data includes canonical author display string.""" - e = event.UpdateAuthors(creator=self.user, - submission_id=1, - authors=[submission.Author()], - authors_display="Foo authors") - try: - e.validate(self.submission) - except Exception as e: - self.fail(str(e), "Data should be valid") - s = e.project(self.submission) - self.assertEqual(s.metadata.authors_display, e.authors_display, - "Authors string should be updated") - - def test_canonical_authors_not_provided(self): - """Data does not include canonical author display string.""" - e = event.UpdateAuthors( - creator=self.user, - submission_id=1, - authors=[ - submission.Author( - forename="Bob", - surname="Paulson", - affiliation="FSU" - ) - ]) - self.assertEqual(e.authors_display, "Bob Paulson (FSU)", - "Display string should be generated automagically") - - try: - e.validate(self.submission) - except Exception as e: - self.fail(str(e), "Data should be valid") - s = e.project(self.submission) - self.assertEqual(s.metadata.authors_display, e.authors_display, - "Authors string should be updated") - - def test_canonical_authors_contains_et_al(self): - """Authors display value contains et al.""" - e = event.UpdateAuthors(creator=self.user, - submission_id=1, - authors=[submission.Author()], - authors_display="Foo authors, et al") - with self.assertRaises(InvalidEvent): - e.validate(self.submission) diff --git a/core/events/exceptions.py b/core/events/exceptions.py deleted file mode 100644 index 1b7f7e0..0000000 --- a/core/events/exceptions.py +++ /dev/null @@ -1,33 +0,0 @@ -"""Exceptions raised during event handling.""" - -from typing import TypeVar, List - -EventType = TypeVar('EventType', bound='core.events.domain.event.Event') - - -class InvalidEvent(ValueError): - """Raised when an invalid event is encountered.""" - - def __init__(self, event: EventType, extra: str='') -> None: - """Use the :class:`.Event` to build an error message.""" - self.event: EventType = event - self.message = f"Invalid event: {event.event_type} ({event.event_id}): {extra}" - super(InvalidEvent, self).__init__(self.message) - -class InvalidStack(ValueError): - """Raised when an invalid event is encountered.""" - - def __init__(self, event_exceptions: List[InvalidEvent], extra: str='') -> None: - """Use the :class:`.Event` to build an error message.""" - self.event_exceptions: List[InvalidEvent] = event_exceptions - self.message = 'Invalid Stack:' - for ex in self.event_exceptions: - self.message += f"\n\t{ex.message}" - super(InvalidStack, self).__init__(self.message) - -class NoSuchSubmission(Exception): - """An operation was performed on/for a submission that does not exist.""" - - -class SaveError(RuntimeError): - """Failed to persist event state.""" diff --git a/core/events/services/__init__.py b/core/events/services/__init__.py deleted file mode 100644 index f79984d..0000000 --- a/core/events/services/__init__.py +++ /dev/null @@ -1 +0,0 @@ -"""External service integrations.""" diff --git a/core/events/services/classic/__init__.py b/core/events/services/classic/__init__.py deleted file mode 100644 index f29acc5..0000000 --- a/core/events/services/classic/__init__.py +++ /dev/null @@ -1,311 +0,0 @@ -""" -Integration with the classic database to persist events and submission state. - -As part of the classic renewal strategy, development of new submission -interfaces must maintain data interoperability with classic components. This -service module must therefore do two main things: - -1. Store and provide access to event data generated during the submission - process, and -2. Keep the classic database tables up to date so that "downstream" components - can continue to operate. Since classic components work directly on - submission tables, persisting events and resulting submission state must - occur in the same transaction. - -An additional challenge is representing changes to submission state made by -classic components, since those changes will be made directly to submission -tables and not involve event-generation. See :func:`get_submission` for -details. - -ORM representations of the classic database tables involved in submission -are located in :mod:`.classic.models`. An additional model, :class:`.DBEvent`, -is defined in the current module. -""" - -from typing import List, Optional, Generator, Dict, Union, Tuple -from contextlib import contextmanager - -from flask import Flask -from sqlalchemy import Column, String, DateTime, ForeignKey, \ - create_engine -from sqlalchemy.ext.indexable import index_property -from sqlalchemy.orm import relationship -from sqlalchemy.engine import Engine -from sqlalchemy.ext.declarative import declarative_base -from sqlalchemy.orm import sessionmaker -from sqlalchemy.orm.session import Session - -from arxiv.base import logging -from events.domain.event import Event, event_factory -from events.domain.submission import License, Submission -from events.domain.agent import User, Client, Agent -from . import models, util -from .models import Base -from .exceptions import NoSuchSubmission, CommitFailed, ClassicBaseException -from arxiv.base.globals import get_application_config, get_application_global - -logger = logging.getLogger(__name__) - - -class DBEvent(Base): # type: ignore - """Database representation of an :class:`.Event`.""" - - __tablename__ = 'event' - - event_id = Column(String(40), primary_key=True) - event_type = Column(String(255)) - proxy = Column(util.FriendlyJSON) - proxy_id = index_property('proxy', 'agent_identifier') - client = Column(util.FriendlyJSON) - client_id = index_property('client', 'agent_identifier') - - creator = Column(util.FriendlyJSON) - creator_id = index_property('creator', 'agent_identifier') - - created = Column(DateTime) - data = Column(util.FriendlyJSON) - submission_id = Column( - ForeignKey('arXiv_submissions.submission_id'), - index=True - ) - - submission = relationship("Submission") - - def to_event(self) -> Event: - """ - Instantiate an :class:`.Event` using event data from this instance. - - Returns - ------- - :class:`.Event` - - """ - _skip = ['creator', 'proxy', 'client', 'submission_id', 'created', - 'event_type'] - data = { - key: value for key, value in self.data.items() - if key not in _skip - } - data['committed'] = True, # Since we're loading from the DB. - return event_factory( - self.event_type, - creator=Agent.from_dict(self.creator), - proxy=Agent.from_dict(self.proxy) if self.proxy else None, - client=Agent.from_dict(self.client) if self.client else None, - submission_id=self.submission_id, - created=self.created, - **data - ) - - -@contextmanager -def transaction() -> Generator: - """Context manager for database transaction.""" - session = current_session() - try: - yield session - session.commit() - except ClassicBaseException as e: - logger.debug('Commit failed, rolling back: %s', str(e)) - session.rollback() - raise # Propagate exceptions raised from this module. - except Exception as e: - logger.debug('Commit failed, rolling back: %s', str(e)) - session.rollback() - raise CommitFailed('Failed to commit transaction') from e - - -def get_licenses() -> List[License]: - """Get a list of :class:`.License`s available for new submissions.""" - license_data = current_session().query(models.License) \ - .filter(models.License.active == '1') - return [License(uri=row.name, name=row.label) for row in license_data] - - -def get_events(submission_id: int) -> List[Event]: - """ - Load events from the classic database. - - Parameters - ---------- - submission_id : int - - Returns - ------- - list - Items are :class:`.Event` instances loaded from the class DB. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised when there are no events for the provided submission ID. - - """ - with transaction() as session: - event_data = session.query(DBEvent) \ - .filter(DBEvent.submission_id == submission_id) \ - .order_by(DBEvent.created) - if not event_data: # No events, no dice. - raise NoSuchSubmission(f'Submission {submission_id} not found') - return [datum.to_event() for datum in event_data] - - -def get_submission(submission_id: int) -> Tuple[Submission, List[Event]]: - """ - Get the current state of a :class:`.Submission` from the database. - - In the medium term, services that use this package will need to - play well with legacy services that integrate with the classic - database. For example, the moderation system does not use the event - model implemented here, and will therefore cause direct changes to the - submission tables that must be reflected in our representation of the - submission. - - Until those legacy components are replaced, we will need to load both the - event stack and the current DB state of the submission, and use the DB - state to patch fields that may have changed outside the purview of the - event model. - - Parameters - ---------- - submission_id : int - - Returns - ------- - :class:`.Submission` - """ - # Load and play events. Eventually, this is the only query we will make - # against the database. - events = get_events(submission_id) - submission = None # We assume that the first event is a creation. - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with transaction() as session: - # Load the current db state of the submission, and patch. Once we have - # retired legacy components that do not follow the event model, this - # step should be removed. - data = session.query(models.Submission).get(submission_id) - if data is None: - raise NoSuchSubmission(f'Submission {submission_id} not found') - return data.patch(submission), events - - -def store_events(*events: Event, submission: Submission) -> Submission: - """ - Store events in the database. - - Parameters - ---------- - events : list - A list of (presumably new) :class:`.Event` instances to be persisted. - Events that have already been committed will not be committed again, - so it's safe to include them here. - submission : :class:`.Submission` - Current state of the submission (after events have been applied). - - Returns - ------- - :class:`.Submission` - Stored submission, updated with current submission ID. - """ - # Commit new events for a single submission in a transaction. - with transaction() as session: - # We need a reference to this row for the event rows, so we add it - # first. - if submission.submission_id is None: - db_submission = models.Submission() - else: - db_submission = session.query(models.Submission)\ - .get(submission.submission_id) - if db_submission is None: - raise RuntimeError("Submission ID is set, but can't find data") - - # Update the submission state from the Submission domain object. - db_submission.update_from_submission(submission) - session.add(db_submission) - - for event in events: - if event.committed: # Don't create duplicate event entries. - continue - - if event.committed: - raise RuntimeError('Event is already committed') - db_event = DBEvent( - event_type=event.event_type, - event_id=event.event_id, - data=event.to_dict(), - created=event.created, - creator=event.creator.to_dict(), - proxy=event.proxy.to_dict() if event.proxy else None, - submission_id=event.submission_id - ) - session.add(db_event) - db_event.submission = db_submission # Will be updated on commit. - event.committed = True - submission.submission_id = db_submission.submission_id - return submission - - -def init_app(app: object = None) -> None: - """Set default configuration parameters for an application instance.""" - config = get_application_config(app) - config.setdefault('CLASSIC_DATABASE_URI', 'sqlite://') - - -def get_engine(app: object = None) -> Engine: - """Get a new :class:`.Engine` for the classic database.""" - config = get_application_config(app) - database_uri = config.get('CLASSIC_DATABASE_URI', 'sqlite://') - return create_engine(database_uri) - - -# TODO: consider making this private. -def get_session(app: object = None) -> Session: - """Get a new :class:`.Session` for the classic database.""" - engine = current_engine() - return sessionmaker(bind=engine)() - - -def current_engine() -> Engine: - """Get/create :class:`.Engine` for this context.""" - g = get_application_global() - if not g: - return get_engine() - if 'classic_engine' not in g: - g.classic_engine = get_engine() # type: ignore - return g.classic_engine # type: ignore - - -def current_session() -> Session: - """Get/create :class:`.Session` for this context.""" - g = get_application_global() - if not g: - return get_session() - if 'classic' not in g: - g.classic = get_session() # type: ignore - return g.classic # type: ignore - - -def create_all() -> None: - """Create all tables in the database.""" - Base.metadata.create_all(current_engine()) - - -def drop_all() -> None: - """Drop all tables in the database.""" - Base.metadata.drop_all(current_engine()) - - -# # TODO: find a better way! -# def _declare_event() -> type: -# """ -# Define DBEvent model. -# -# This is deferred until runtime so that we can inject an alternate model -# for testing. This is less than ideal, but (so far) appears to be the only -# way to effectively replace column data types, which we need in order to -# use JSON columns with SQLite. -# """ -# -# return DBEvent diff --git a/core/events/services/classic/exceptions.py b/core/events/services/classic/exceptions.py deleted file mode 100644 index 996be52..0000000 --- a/core/events/services/classic/exceptions.py +++ /dev/null @@ -1,13 +0,0 @@ -"""Exceptions raised by :mod:`events.services.classic`.""" - - -class ClassicBaseException(RuntimeError): - """Base for classic service exceptions.""" - - -class NoSuchSubmission(ClassicBaseException): - """A request was made for a submission that does not exist.""" - - -class CommitFailed(ClassicBaseException): - """Raised when there was a problem committing changes to the database.""" diff --git a/core/events/services/classic/tests.py b/core/events/services/classic/tests.py deleted file mode 100644 index 8e96c6a..0000000 --- a/core/events/services/classic/tests.py +++ /dev/null @@ -1,322 +0,0 @@ -""" -Integration tests for the classic database service. - -These tests assume that SQLAlchemy's MySQL backend is implemented correctly: -instead of using a live MySQL database, they use an in-memory SQLite database. -This is mostly fine (they are intended to be more-or-less swappable). The one -iffy bit is the JSON datatype, which is not available by default in the SQLite -backend, and so we inject a simple one here. End to end tests with a live MySQL -database will provide more confidence in this area. -""" - -from unittest import TestCase, mock -import os -from datetime import datetime -from contextlib import contextmanager -import json - -from flask import Flask - -from events.domain.agent import User -from events.domain.submission import License, Submission, Author -from events.domain.event import CreateSubmission, UpdateMetadata, \ - FinalizeSubmission, SetPrimaryClassification, AddSecondaryClassification, \ - SelectLicense, SetPrimaryClassification, AcceptPolicy, \ - VerifyContactInformation -from events.domain.agent import User -from events.services import classic - - -@contextmanager -def in_memory_db(): - """Provide an in-memory sqlite database for testing purposes.""" - app = Flask('foo') - app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite://' - app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False - - with app.app_context(): - classic.init_app(app) - classic.create_all() - try: - yield classic.current_session() - except Exception: - raise - finally: - classic.drop_all() - - -class TestGetLicenses(TestCase): - """Test :func:`.classic.get_licenses`.""" - - def test_get_all_active_licenses(self): - """Return a :class:`.License` for each active license in the db.""" - # mock_util.json_factory.return_value = SQLiteJSON - - with in_memory_db() as session: - session.add(classic.models.License( - name="http://arxiv.org/licenses/assumed-1991-2003", - sequence=9, - label="Assumed arXiv.org perpetual, non-exclusive license to", - active=0 - )) - session.add(classic.models.License( - name="http://creativecommons.org/licenses/publicdomain/", - sequence=4, - label="Creative Commons Public Domain Declaration", - active=1 - )) - session.commit() - licenses = classic.get_licenses() - - self.assertEqual(len(licenses), 1, - "Only the active license should be returned.") - self.assertIsInstance(licenses[0], License, - "Should return License instances.") - self.assertEqual(licenses[0].uri, - "http://creativecommons.org/licenses/publicdomain/", - "Should use name column to populate License.uri") - self.assertEqual(licenses[0].name, - "Creative Commons Public Domain Declaration", - "Should use label column to populate License.name") - - -class TestStoreEvents(TestCase): - """Test :func:`.classic.store_events`.""" - - def test_store_event(self): - """Store a single event.""" - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - submission = ev.apply() - submission = classic.store_events(ev, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(db_submission.submitter_id, - submission.creator.native_id, - "The native ID of the creator should be used") - self.assertEqual(db_submission.status, db_submission.NOT_SUBMITTED, - "Submission in database should be in status 0 (not" - " submitted) by default.") - - def test_store_events_with_metadata(self): - """Store events and attendant submission with metadata.""" - metadata = { - 'title': 'foo title', - 'abstract': 'very abstract', - 'comments': 'indeed', - 'msc_class': 'foo msc', - 'acm_class': 'computer-y', - 'doi': '10.01234/5678', - 'journal_ref': 'Nature 1: 1', - 'authors': [Author(order=0, forename='Joe', surname='Bloggs')] - } - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = UpdateMetadata(creator=user, - metadata=list(metadata.items())) - - submission = ev.apply() - submission = ev2.apply(submission) - submission = classic.store_events(ev, ev2, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - db_events = session.query(classic.DBEvent).all() - - for key, value in metadata.items(): - if key == 'authors': - continue - self.assertEqual(getattr(db_submission, key), value, - f"The value of {key} should be {value}") - self.assertEqual(db_submission.authors, - submission.metadata.authors_display, - "The canonical author string should be used to" - " update the submission in the database.") - - self.assertEqual(len(db_events), 2, "Two events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set") - - def test_store_events_with_finalized_submission(self): - """Store events and a finalized submission.""" - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = FinalizeSubmission(creator=user) - submission = ev.apply() - submission = ev2.apply(submission) - submission = classic.store_events(ev, ev2, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - db_events = session.query(classic.DBEvent).all() - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(len(db_events), 2, "Two events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set") - - def test_store_events_with_classification(self): - """Store events including classification.""" - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = SetPrimaryClassification(creator=user, - category='physics.soc-ph') - ev3 = AddSecondaryClassification(creator=user, - category='physics.acc-ph') - submission = ev.apply() - submission = ev2.apply(submission) - submission = ev3.apply(submission) - - with in_memory_db() as session: - submission = classic.store_events(ev, ev2, ev3, - submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - db_events = session.query(classic.DBEvent).all() - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(len(db_events), 3, "Three events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set") - self.assertEqual(len(db_submission.categories), 2, - "Two category relations should be set") - self.assertEqual(db_submission.primary_classification.category, - submission.primary_classification.category, - "Primary classification should be set.") - - -class TestGetSubmission(TestCase): - """Test :func:`.classic.get_submission`.""" - - def test_get_submission_that_does_not_exist(self): - """Test that an exception is raised when submission doesn't exist.""" - with in_memory_db(): - with self.assertRaises(classic.exceptions.NoSuchSubmission): - classic.get_submission(1) - - def test_get_submission_with_publish(self): - """Test that publication state is reflected in submission data.""" - user = User(12345, 'joe@joe.joe') - events = [ - CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[ - ('title', 'Foo title'), - ('abstract', 'Indeed'), - ('authors', [ - Author(order=0, forename='Joe', surname='Bloggs', - email='joe@blo.ggs'), - Author(order=1, forename='Jane', surname='Doe', - email='j@doe.com'), - ]) - ]), - SelectLicense(creator=user, license_uri='http://foo.org/1.0/', - license_name='Foo zero 1.0'), - SetPrimaryClassification(creator=user, category='cs.DL'), - AcceptPolicy(creator=user), - VerifyContactInformation(creator=user), - FinalizeSubmission(creator=user) - ] - submission = None - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with in_memory_db() as session: - # User creates and finalizes submission. - submission = classic.store_events(*events, submission=submission) - ident = submission.submission_id - - # Moderation happens, things change outside the event model. - db_submission = session.query(classic.models.Submission).get(ident) - - # Published! - db_submission.status = db_submission.PUBLISHED - db_document = classic.models.Document(paper_id='1234.5678') - db_submission.document = db_document - session.add(db_submission) - session.add(db_document) - session.commit() - - # Now get the submission. - submission_loaded, _ = classic.get_submission(ident) - - self.assertEqual(submission.metadata.title, - submission_loaded.metadata.title, - "Event-derived metadata should be preserved.") - self.assertEqual(submission_loaded.arxiv_id, "1234.5678", - "arXiv paper ID should be set") - self.assertEqual(submission_loaded.status, Submission.PUBLISHED, - "Submission status should reflect publish action") - - def test_get_submission_with_hold_and_reclass(self): - """Test changes made externally are reflected in submission data.""" - user = User(12345, 'joe@joe.joe') - events = [ - CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[ - ('title', 'Foo title'), - ('abstract', 'Indeed'), - ('authors', [ - Author(order=0, forename='Joe', surname='Bloggs', - email='joe@blo.ggs'), - Author(order=1, forename='Jane', surname='Doe', - email='j@doe.com'), - ]) - ]), - SelectLicense(creator=user, license_uri='http://foo.org/1.0/', - license_name='Foo zero 1.0'), - SetPrimaryClassification(creator=user, category='cs.DL'), - AcceptPolicy(creator=user), - VerifyContactInformation(creator=user), - FinalizeSubmission(creator=user) - ] - submission = None - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with in_memory_db() as session: - # User creates and finalizes submission. - submission = classic.store_events(*events, submission=submission) - ident = submission.submission_id - - # Moderation happens, things change outside the event model. - db_submission = session.query(classic.models.Submission).get(ident) - - # Reclassification! - session.delete(db_submission.primary_classification) - session.add(classic.models.SubmissionCategory( - submission_id=ident, category='cs.IR', is_primary=1 - )) - - # On hold! - db_submission.status = db_submission.ON_HOLD - session.add(db_submission) - session.commit() - - # Now get the submission. - submission_loaded, _ = classic.get_submission(ident) - - self.assertEqual(submission.metadata.title, - submission_loaded.metadata.title, - "Event-derived metadata should be preserved.") - self.assertEqual(submission_loaded.primary_classification.category, - "cs.IR", - "Primary classification should reflect the" - " reclassification that occurred outside the purview" - " of the event model.") - self.assertEqual(submission_loaded.status, Submission.ON_HOLD, - "Submission status should reflect hold action" - " performed outside the purview of the event model.") diff --git a/core/events/services/classic/util.py b/core/events/services/classic/util.py deleted file mode 100644 index 98e5426..0000000 --- a/core/events/services/classic/util.py +++ /dev/null @@ -1,28 +0,0 @@ -"""Utility classes and functions for :mod:`events.services.classic`.""" - -import json -from typing import Optional -import sqlalchemy.types as types - - -class SQLiteJSON(types.TypeDecorator): - """A SQLite-friendly JSON data type.""" - - impl = types.TEXT - - def process_bind_param(self, value: Optional[dict], dialect: str) -> str: - """Serialize a dict to JSON.""" - if value is not None: - value = json.dumps(value) - return value - - def process_result_value(self, value: str, dialect: str) -> Optional[dict]: - """Deserialize JSON content to a dict.""" - if value is not None: - value = json.loads(value) - return value - - -# SQLite does not support JSON, so we extend JSON to use our custom data type -# as a variant for the 'sqlite' dialect. -FriendlyJSON = types.JSON().with_variant(SQLiteJSON, 'sqlite') diff --git a/core/events/services/notification.py b/core/events/services/notification.py deleted file mode 100644 index 42b4581..0000000 --- a/core/events/services/notification.py +++ /dev/null @@ -1,25 +0,0 @@ -""" -The notication service provides integration with the notification broker. - -Brokered notifications provide an event-based integration strategy for other -arXiv submission and moderation services, including the webhook service (to -notify external-to-arXiv services). -""" - -from events.domain import Event, Submission - - -def emit(event: Event, submission: Submission) -> None: - """ - Emit an event to the notification broker. - - Parameters - ---------- - event : :class:`.Event` - submission : :class:`.Submission` - - Raises - ------ - NotificationFailed - """ - # TODO: implement me! diff --git a/core/events/tests/test_classic_integration.py b/core/events/tests/test_classic_integration.py deleted file mode 100644 index 8614cd7..0000000 --- a/core/events/tests/test_classic_integration.py +++ /dev/null @@ -1,541 +0,0 @@ -""" -Tests for integration with the classic system. - -Provides test cases for the new events model's ability to replicate the classic -model. The function `TestClassicUIWorkflow.test_classic_workflow()` provides -keyword arguments to pass different types of data through the workflow. - -TODO: Presently, `test_classic_workflow` expects `core.domain` objects. That -should change to instantiate each object at runtime for database imports. -""" - -from unittest import TestCase, mock -from datetime import datetime -import tempfile -from contextlib import contextmanager - -from flask import Flask - -import events -from events.services import classic - - -@contextmanager -def in_memory_db(): - """Provide an in-memory sqlite database for testing purposes.""" - app = Flask('foo') - app.config['CLASSIC_DATABASE_URI'] = 'sqlite://' - - with app.app_context(): - classic.init_app(app) - classic.create_all() - try: - yield classic.current_session() - except Exception: - raise - finally: - classic.drop_all() - - -class TestClassicUIWorkflow(TestCase): - """Replicate the classic submission UI workflow.""" - - def setUp(self): - """An arXiv user is submitting a new paper.""" - self.submitter = events.domain.User(1234, email='j.user@somewhere.edu', - forename='Jane', surname='User') - self.unicode_submitter = events.domain.User(12345, email='j.user@somewhere.edu', - forename='大', surname='用户') - - def test_classic_workflow(self, submitter=None, metadata=None, authors=None): - """Submitter proceeds through workflow in a linear fashion.""" - - # Instantiate objects that have not yet been instantiated or use defaults. - if submitter is None: - submitter = self.submitter - - if metadata is None: - metadata = [ - ('title', 'Foo title'), - ('abstract', "One morning, as Gregor Samsa was waking up..."), - ('comments', '5 pages, 2 turtle doves'), - ('report_num', 'asdf1234'), - ('doi', '10.01234/56789'), - ('journal_ref', 'Foo Rev 1, 2 (1903)') - ] - - - # TODO: Process data in dictionary form to events.Author objects. - if authors is None: - authors = [events.Author(order=0, - forename='Bob', - surname='Paulson', - email='Robert.Paulson@nowhere.edu', - affiliation='Fight Club' - )] - - with in_memory_db() as session: - # Submitter clicks on 'Start new submission' in the user dashboard. - submission, stack = events.save( - events.CreateSubmission(creator=submitter) - ) - self.assertIsNotNone(submission.submission_id, - "A submission ID is assigned") - self.assertEqual(len(stack), 1, "A single command is executed.") - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - self.assertEqual(db_submission.submission_id, - submission.submission_id, - "A row is added to the submission table") - self.assertEqual(db_submission.submitter_id, - submitter.native_id, - "Submitter ID set on submission") - self.assertEqual(db_submission.submitter_email, - submitter.email, - "Submitter email set on submission") - self.assertEqual(db_submission.submitter_name, submitter.name, - "Submitter name set on submission") - self.assertEqual(db_submission.created, submission.created, - "Creation datetime set correctly") - - # TODO: What else to check here? - - # /start: Submitter completes the start submission page. - license_uri = 'http://creativecommons.org/publicdomain/zero/1.0/' - submission, stack = events.save( - events.VerifyContactInformation(creator=submitter), - events.AssertAuthorship( - creator=submitter, - submitter_is_author=True - ), - events.SelectLicense( - creator=submitter, - license_uri=license_uri, - license_name='CC0 1.0' - ), - events.AcceptPolicy(creator=submitter), - events.SetPrimaryClassification( - creator=submitter, - category='cs.DL' - ), - submission_id=submission.submission_id - ) - self.assertEqual(len(stack), 6, - "Six commands have been executed in total.") - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - self.assertEqual(db_submission.userinfo, 1, - "Contact verification set correctly in database.") - self.assertEqual(db_submission.is_author, 1, - "Authorship status set correctly in database.") - self.assertEqual(db_submission.license, license_uri, - "License set correctly in database.") - self.assertEqual(db_submission.agree_policy, 1, - "Policy acceptance set correctly in database.") - self.assertEqual(len(db_submission.categories), 1, - "A single category is associated in the database") - self.assertEqual(db_submission.categories[0].is_primary, 1, - "Primary category is set correct in the database") - self.assertEqual(db_submission.categories[0].category, 'cs.DL', - "Primary category is set correct in the database") - - # /addfiles: Submitter has uploaded files to the file management - # service, and verified that they compile. Now they associate the - # content package with the submission. - submission, stack = events.save( - events.AttachSourceContent( - creator=submitter, - location="https://submit.arxiv.org/upload/123", - checksum="a9s9k342900skks03330029k", - format='tex', - mime_type="application/zip", - identifier=123, - size=593992 - ), - submission_id=submission.submission_id - ) - - self.assertEqual(len(stack), 7, - "Seven commands have been executed in total.") - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - self.assertEqual(db_submission.must_process, 0, - "Processing status is set correctly in database") - self.assertEqual(db_submission.source_size, 593992, - "Source package size set correctly in database") - self.assertEqual(db_submission.source_format, 'tex', - "Source format set correctly in database") - - # /metadata: Submitter adds metadata to their submission, including - # authors. In this package, we model authors in more detail than - # in the classic system, but we should preserve the canonical - # format in the db for legacy components' sake. - submission, stack = events.save( - events.UpdateMetadata( - creator=submitter, - metadata=metadata - ), - events.UpdateAuthors( - creator=submitter, - authors=authors - ), - submission_id=submission.submission_id - ) - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - self.assertEqual(db_submission.title, dict(metadata)['title'], - "Title updated as expected in database") - self.assertEqual(db_submission.abstract, - dict(metadata)['abstract'], - "Abstract updated as expected in database") - self.assertEqual(db_submission.comments, - dict(metadata)['comments'], - "Comments updated as expected in database") - self.assertEqual(db_submission.report_num, - dict(metadata)['report_num'], - "Report number updated as expected in database") - self.assertEqual(db_submission.doi, dict(metadata)['doi'], - "DOI updated as expected in database") - self.assertEqual(db_submission.journal_ref, - dict(metadata)['journal_ref'], - "Journal ref updated as expected in database") - - author_str = ';'.join([f"{author.forename} {author.surname} ({author.affiliation})" - for author in authors]) - self.assertEqual(db_submission.authors, - author_str, - "Authors updated in canonical format in database") - - self.assertEqual(len(stack), 9, - "Nine commands have been executed in total.") - - # /preview: Submitter adds a secondary classification. - submission, stack = events.save( - events.AddSecondaryClassification( - creator=submitter, - category='cs.IR' - ), - submission_id=submission.submission_id - ) - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - self.assertEqual(len(db_submission.categories), 2, - "A secondary category is added in the database") - secondaries = [ - db_cat for db_cat in db_submission.categories - if db_cat.is_primary == 0 - ] - self.assertEqual(len(secondaries), 1, - "A secondary category is added in the database") - self.assertEqual(secondaries[0].category, 'cs.IR', - "A secondary category is added in the database") - self.assertEqual(len(stack), 10, - "Ten commands have been executed in total.") - - # /preview: Submitter finalizes submission. - finalize = events.FinalizeSubmission(creator=submitter) - submission, stack = events.save( - finalize, submission_id=submission.submission_id - ) - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - self.assertEqual(db_submission.status, db_submission.SUBMITTED, - "Submission status set correctly in database") - self.assertEqual(db_submission.submit_time, finalize.created, - "Submit time is set.") - self.assertEqual(len(stack), 11, - "Eleven commands have been executed in total.") - - def test_unicode_submitter(self): - """Submitter proceeds through workflow in a linear fashion.""" - submitter = self.unicode_submitter - metadata = [ - ('title', '优秀的称号'), - ('abstract', "当我有一天正在上学的时候"), - ('comments', '5页2龟鸠'), - ('report_num', 'asdf1234'), - ('doi', '10.01234/56789'), - ('journal_ref', 'Foo Rev 1, 2 (1903)') - ] - authors = [events.Author( - order=0, - forename='惊人', - surname='用户', - email='amazing.user@nowhere.edu', - affiliation='Fight Club' - )] - - self.test_classic_workflow( - submitter=submitter, metadata=metadata, authors=authors) - - def test_texism_titles(self): - """Submitter proceeds through workflow in a linear fashion.""" - metadata = [ - ('title', 'Revisiting $E = mc^2$'), - ('abstract', "$E = mc^2$ is a foundational concept in physics"), - ('comments', '5 pages, 2 turtle doves'), - ('report_num', 'asdf1234'), - ('doi', '10.01234/56789'), - ('journal_ref', 'Foo Rev 1, 2 (1903)') - ] - - self.test_classic_workflow(metadata=metadata) - - -class TestPublicationIntegration(TestCase): - """ - Test integration with the classic database concerning publication. - - Since the publication process continues to run outside of the event model - in the short term, we need to be certain that publication-related changes - are represented accurately in this project. - """ - - @classmethod - def setUpClass(cls): - """Instantiate an app for use with a SQLite database.""" - _, db = tempfile.mkstemp(suffix='.sqlite') - cls.app = Flask('foo') - cls.app.config['CLASSIC_DATABASE_URI'] = f'sqlite:///{db}' - - with cls.app.app_context(): - classic.init_app(cls.app) - - def setUp(self): - """An arXiv user is submitting a new paper.""" - self.submitter = events.domain.User(1234, email='j.user@somewhere.edu', - forename='Jane', surname='User') - - # Create and finalize a new submission. - cc0 = 'http://creativecommons.org/publicdomain/zero/1.0/' - with self.app.app_context(): - classic.create_all() - self.submission, _ = events.save( - events.CreateSubmission(creator=self.submitter), - events.VerifyContactInformation(creator=self.submitter), - events.AssertAuthorship( - creator=self.submitter, - submitter_is_author=True - ), - events.SelectLicense( - creator=self.submitter, - license_uri=cc0, - license_name='CC0 1.0' - ), - events.AcceptPolicy(creator=self.submitter), - events.SetPrimaryClassification( - creator=self.submitter, - category='cs.DL' - ), - events.AttachSourceContent( - creator=self.submitter, - location="https://submit.arxiv.org/upload/123", - checksum="a9s9k342900skks03330029k", - format='tex', - mime_type="application/zip", - identifier=123, - size=593992 - ), - events.UpdateMetadata( - creator=self.submitter, - metadata=[ - ('title', 'Foo title'), - ('abstract', "One morning, as Gregor Samsa was..."), - ('comments', '5 pages, 2 turtle doves'), - ('report_num', 'asdf1234'), - ('doi', '10.01234/56789'), - ('journal_ref', 'Foo Rev 1, 2 (1903)') - ] - ), - events.UpdateAuthors( - creator=self.submitter, - authors=[events.Author( - order=0, - forename='Bob', - surname='Paulson', - email='Robert.Paulson@nowhere.edu', - affiliation='Fight Club' - )] - ), - events.FinalizeSubmission(creator=self.submitter) - ) - - def tearDown(self): - """Clear the database after each test.""" - with self.app.app_context(): - classic.drop_all() - - def test_publication_status_is_reflected(self): - """The submission has been published/announced.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.PUBLISHED - dated = (datetime.now() - datetime.utcfromtimestamp(0)) - primary = self.submission.primary_classification.category - db_submission.document = classic.models.Document( - document_id=1, - paper_id='1901.00123', - title=self.submission.metadata.title, - authors=self.submission.metadata.authors_display, - dated=dated.total_seconds(), - primary_subject_class=primary, - created=datetime.now(), - submitter_email=self.submission.creator.email, - submitter_id=self.submission.creator.native_id - ) - session.add(db_submission) - session.commit() - - # Submission state should reflect publication status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.PUBLISHED, - "Submission should have published status.") - self.assertEqual(submission.arxiv_id, "1901.00123", - "arXiv paper ID should be set") - self.assertFalse(submission.active, - "Published submission should no longer be active") - - def test_publication_status_is_reflected_after_files_expire(self): - """The submission has been published/announced, and files expired.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.DELETED_PUBLISHED - dated = (datetime.now() - datetime.utcfromtimestamp(0)) - primary = self.submission.primary_classification.category - db_submission.document = classic.models.Document( - document_id=1, - paper_id='1901.00123', - title=self.submission.metadata.title, - authors=self.submission.metadata.authors_display, - dated=dated.total_seconds(), - primary_subject_class=primary, - created=datetime.now(), - submitter_email=self.submission.creator.email, - submitter_id=self.submission.creator.native_id - ) - session.add(db_submission) - session.commit() - - # Submission state should reflect publication status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.PUBLISHED, - "Submission should have published status.") - self.assertEqual(submission.arxiv_id, "1901.00123", - "arXiv paper ID should be set") - self.assertFalse(submission.active, - "Published submission should no longer be active") - - def test_scheduled_status_is_reflected(self): - """The submission has been scheduled for publication today.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.PROCESSING - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.SCHEDULED, - "Submission should have scheduled status.") - - def test_scheduled_status_is_reflected_processing_submission(self): - """The submission has been scheduled for publication today.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.PROCESSING_SUBMISSION - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.SCHEDULED, - "Submission should have scheduled status.") - - def test_scheduled_status_is_reflected_prior_to_announcement(self): - """The submission is being published; not yet announced.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.NEEDS_EMAIL - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.SCHEDULED, - "Submission should have scheduled status.") - - def test_scheduled_tomorrow_status_is_reflected(self): - """The submission has been scheduled for publication tomorrow.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.NEXT_PUBLISH_DAY - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.SCHEDULED, - "Submission should be scheduled for tomorrow.") - - def test_publication_failed(self): - """The submission was not published successfully.""" - with self.app.app_context(): - session = classic.current_session() - - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = db_submission.ERROR_STATE - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.ERROR, - "Submission should have error status.") - - def test_deleted(self): - """The submission was deleted.""" - with self.app.app_context(): - session = classic.current_session() - - for classic_status in classic.models.Submission.DELETED: - # Publication agent publishes the paper. - db_submission = session.query(classic.models.Submission)\ - .get(self.submission.submission_id) - db_submission.status = classic_status - session.add(db_submission) - session.commit() - - # Submission state should reflect scheduled status. - submission, _ = events.load(self.submission.submission_id) - self.assertEqual(submission.status, submission.DELETED, - "Submission should have deleted status.") diff --git a/core/scripts/export_submissions.sql b/core/scripts/export_submissions.sql index e8c6200..ff703b7 100644 --- a/core/scripts/export_submissions.sql +++ b/core/scripts/export_submissions.sql @@ -1,8 +1,9 @@ -- Exports submissions and primary category from classic db. -- For use with process_submissions.py --- +-- -- mysql -u root -B arXiv < export_submissions.sql > submissions.tsv -SELECT sub.*, cat.category -FROM arXiv.arXiv_submissions sub, arXiv.arXiv_submission_category cat -WHERE sub.submission_id = cat.submission_id AND cat.is_primary = 1 +SELECT sub.*, cat.category, cat.is_primary +FROM arXiv.arXiv_submissions sub, arXiv.arXiv_submission_category cat +WHERE sub.submission_id = cat.submission_id +ORDER BY sub.submission_id DESC LIMIT 1000; diff --git a/core/scripts/process_submissions.py b/core/scripts/process_submissions.py index f7831bd..faf9d24 100644 --- a/core/scripts/process_submissions.py +++ b/core/scripts/process_submissions.py @@ -22,19 +22,26 @@ from flask import Flask -import events -from events.domain.submission import Submission -from events.services import classic +from arxiv.submission import save, domain, CreateSubmission, ConfirmAuthorship,\ + ConfirmContactInformation, ConfirmPolicy, SetTitle, SetAbstract, \ + SetComments, SetDOI, SetReportNumber, SetJournalReference, \ + SetUploadPackage, SetLicense, SetPrimaryClassification, \ + AddSecondaryClassification, SetAuthors, FinalizeSubmission, load -from events.exceptions import InvalidStack +from arxiv.submission.domain.submission import Submission +from arxiv.submission.services import classic + +from arxiv.submission.exceptions import InvalidEvent INVALID_STATUSES = ['0', '20', '29', '30'] + @contextmanager def in_memory_db(): """Provide an in-memory sqlite database for testing purposes.""" app = Flask('foo') app.config['CLASSIC_DATABASE_URI'] = 'sqlite://' + app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False with app.app_context(): classic.init_app(app) @@ -46,15 +53,21 @@ def in_memory_db(): finally: classic.drop_all() + def process_csv(tsvfile, session): """Process a tsvfile using DictReader.""" with open(tsvfile) as tsvfh: reader = DictReader(tsvfh, delimiter='\t') for submission in reader: + for key, value in submission.items(): + if value == 'NULL': + submission[key] = None + if int(submission['event_version']) > 1: + continue try: submission_id = process_submission(submission) verify_submission(submission, submission_id) - except (AssertionError, InvalidStack) as e: + except InvalidEvent as e: logging.error('{}: {}'.format(submission['submission_id'], e)) @@ -66,33 +79,34 @@ def process_submission(s): except ValueError: forename = '' surname = s['submitter_name'] - submitter = events.domain.User(s['submitter_id'], email=s['submitter_email'], - forename=forename, surname=surname) + submitter = domain.User(s['submitter_id'], email=s['submitter_email'], + forename=forename, surname=surname, + endorsements=[s['category']]) - metadata = [ + metadata = dict([ ('title', s['title']), ('abstract', s['abstract']), ('comments', s['comments']), ('report_num', s['report_num']), ('doi', s['doi']), ('journal_ref', s['journal_ref']) - ] + ]) - submission, stack = events.save( - events.CreateSubmission(creator=submitter) + submission, stack = save( + CreateSubmission(creator=submitter) ) if s.get('is_author') == '1': - submission, stack = events.save( - events.AssertAuthorship( + submission, stack = save( + ConfirmAuthorship( creator=submitter, submitter_is_author=True ), submission_id=submission.submission_id ) else: - submission, stack = events.save( - events.AssertAuthorship( + submission, stack = save( + ConfirmAuthorship( creator=submitter, submitter_is_author=False ), @@ -100,48 +114,72 @@ def process_submission(s): ) if s.get('agree_policy') == '1': - submission, stack = events.save( - events.AcceptPolicy(creator=submitter), + submission, stack = save( + ConfirmPolicy(creator=submitter), submission_id=submission.submission_id ) if s.get('userinfo') == '1': - submission, stack = events.save( - events.VerifyContactInformation(creator=submitter), + submission, stack = save( + ConfirmContactInformation(creator=submitter), submission_id=submission.submission_id ) - submission, stack = events.save( - events.UpdateAuthors( + submission, stack = save( + SetAuthors( authors_display=s['authors'], creator=submitter ), - events.UpdateMetadata( - creator=submitter, - metadata=metadata - ), - events.SetPrimaryClassification( - creator=submitter, - category=s['category'] - ), + SetPrimaryClassification(creator=submitter, category=s['category']), submission_id=submission.submission_id ) - + if s['title']: + submission, stack = save( + SetTitle(creator=submitter, title=metadata['title']), + submission_id=submission.submission_id + ) + if s['abstract']: + submission, stack = save( + SetAbstract(creator=submitter, abstract=metadata['abstract']), + submission_id=submission.submission_id + ) + if metadata['comments']: + submission, stack = save( + SetComments(creator=submitter, comments=metadata['comments']), + submission_id=submission.submission_id + ) + + if metadata['journal_ref']: + submission, stack = save( + SetJournalReference(creator=submitter, + journal_ref=metadata['journal_ref']), + submission_id=submission.submission_id + ) + + if metadata['doi']: + submission, stack = save( + SetDOI(creator=submitter, doi=metadata['doi']), + submission_id=submission.submission_id + ) + + if metadata['report_num']: + submission, stack = save( + SetReportNumber(creator=submitter, + report_num=metadata['report_num']), + submission_id=submission.submission_id + ) + # Parse the license license_uri = s.get('license') if license_uri: - submission, stack = events.save( - events.SelectLicense( - creator=submitter, - license_uri=license_uri - ), + submission, stack = save( + SetLicense(creator=submitter, license_uri=license_uri), submission_id=submission.submission_id ) if s.get('package'): - submission, stack = events.save( - events.AttachSourceContent( - location='https://example.arxiv.org/' + s['package'], + submission, stack = save( + SetUploadPackage( format=s['source_format'], checksum='0', identifier=1, @@ -151,37 +189,41 @@ def process_submission(s): ) if s.get('status') not in INVALID_STATUSES: - submission, stack = events.save( - events.FinalizeSubmission( - creator=submitter - ), + submission, stack = save( + FinalizeSubmission(creator=submitter), submission_id=submission.submission_id ) return submission.submission_id - # If it goes to the end, then verify that results come in - # events.load() returns a submission object, then verify it looks as expected + # load() returns a submission object, then verify it looks as expected def verify_submission(s, submission_id): """Validate event database storage of classic db import data.""" - submission, stack = events.load(submission_id) - - assert submission.metadata.title == s['title'] - assert submission.metadata.abstract == s['abstract'] - assert submission.metadata.comments == s['comments'] - assert submission.metadata.report_num == s['report_num'] - assert submission.metadata.doi == s['doi'] - assert submission.metadata.journal_ref == s['journal_ref'] - + submission, stack = load(submission_id) + + if s['title']: + assert submission.metadata.title == SetTitle.cleanup(s['title']) + if s['abstract']: + assert submission.metadata.abstract == SetAbstract.cleanup(s['abstract']) + if s['comments']: + assert submission.metadata.comments == SetComments.cleanup(s['comments']) + if s['report_num']: + assert submission.metadata.report_num == SetReportNumber.cleanup(s['report_num']) + if s['doi']: + assert submission.metadata.doi == SetDOI.cleanup(s['doi']) + if s['journal_ref']: + assert submission.metadata.journal_ref == SetJournalReference.cleanup(s['journal_ref']) + if s.get('userinfo') == '1': - assert submission.submitter_contact_verified, "VerifyContactInformationError" + assert submission.submitter_contact_verified, \ + "ConfirmContactInformationError" else: assert not submission.submitter_contact_verified if s.get('agree_policy') == '1': - assert submission.submitter_accepts_policy, "AcceptPolicy Error" + assert submission.submitter_accepts_policy, "ConfirmPolicy Error" else: assert not submission.submitter_accepts_policy @@ -189,15 +231,18 @@ def verify_submission(s, submission_id): assert submission.license.uri == s['license'] if s.get('is_author') == '1': - assert submission.submitter_is_author, "AssertAuthorship not aligned: returns False, should be True" + assert submission.submitter_is_author, \ + "ConfirmAuthorship not aligned: returns False, should be True" else: - assert not submission.submitter_is_author, "AssertAuthorship does not match: returns True, should be False" + assert not submission.submitter_is_author, \ + "ConfirmAuthorship does not match: returns True, should be False" if s.get('status') not in INVALID_STATUSES: assert submission.status == Submission.SUBMITTED else: assert submission.status == Submission.WORKING + if __name__ == '__main__': parser = ArgumentParser() parser.add_argument('tsvfile', help='TSV file') diff --git a/core/setup.py b/core/setup.py index 6c97e71..38b9147 100644 --- a/core/setup.py +++ b/core/setup.py @@ -1,10 +1,34 @@ -"""Install arXiv submission event core package.""" +"""Install arXiv submission core package.""" from setuptools import setup, find_packages setup( - name='arxiv-submission-events', - version='0.2', - packages=find_packages(exclude=['test*']), - zip_safe=False + name='arxiv-submission-core', + version='0.7.1rc40', + packages=[f'arxiv.{package}' for package + in find_packages('arxiv')], + zip_safe=False, + install_requires=[ + 'arxiv-base>=0.15.7rc8', + 'arxiv-auth>=0.3.2rc3', + 'flask', + 'mysqlclient', + 'bleach', + 'unidecode', + 'python-dateutil', + 'sqlalchemy', + 'flask-sqlalchemy', + 'dataclasses', + 'celery==4.1.0', + 'kombu==4.1.0', + 'redis==2.10.6', + 'mypy_extensions==0.4.1', + 'requests==2.21.0', + 'semver==2.8.1', + 'retry==0.9.2', + 'pytz==2018.7', + 'backports-datetime-fromisoformat==1.0.0', + 'typing_extensions' + ], + include_package_data=True ) diff --git a/docker-compose.yml b/docker-compose.yml index 86ace92..af247f5 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -12,8 +12,6 @@ services: MYSQL_ROOT_PASSWORD: foorootpassword MYSQL_DATABASE: submission command: ["mysqld", "--character-set-server=utf8mb4", "--collation-server=utf8mb4_unicode_ci"] - ports: - - "3307:3306" submission-redis: image: redis @@ -21,29 +19,20 @@ services: networks: - arxiv-submission-local - # submission-localstack: - # image: atlassianlabs/localstack - # container_name: submission-localstack - # networks: - # - arxiv-submission-local - # ports: - # - "4568:4568" - # - "4569:4569" - # environment: - # USE_SSL: 'true' - # DEBUG: 'true' - submission-metadata: build: context: ./ dockerfile: Dockerfile-metadata - container_name: submission-metadata + container_name: metadata environment: + BASE_SERVER: localhost:8000 + EXTERNAL_URL_SCHEME: http REDIS_ENDPOINT: "submission-redis:6379" AWS_ACCESS_KEY_ID: "foo" AWS_SECRET_ACCESS_KEY: "bar" LOGLEVEL: 10 CLASSIC_DATABASE_URI: "mysql+mysqldb://foouser:foopass@submission-maria:3306/submission?charset=utf8" + JWT_SECRET: foosecret networks: - arxiv-submission-local depends_on: @@ -51,36 +40,158 @@ services: - submission-maria # - submission-localstack + # accounts-redis-sessions: + # image: grokzen/redis-cluster:4.0.9 + # container_name: accounts-redis-sessions + # networks: + # - arxiv-submission-local + # ports: + # - '127.0.0.1:7000:7000' + # - '127.0.0.1:7001:7001' + # - '127.0.0.1:7002:7002' + # - '127.0.0.1:7003:7003' + # - '127.0.0.1:7004:7004' + # - '127.0.0.1:7005:7005' + # - '127.0.0.1:7006:7006' + # environment: + # IP: "0.0.0.0" + + accounts-redis-sessions: + image: redis:4 + container_name: accounts-redis-sessions + networks: + - arxiv-submission-local - submission-authorization: - build: - context: ./authorization/ - dockerfile: Dockerfile - container_name: submission-authorization + accounts-maria: + image: mariadb:10.3 + container_name: accounts-maria + networks: + - arxiv-submission-local + environment: + MYSQL_USER: foouser + MYSQL_PASSWORD: foopass + MYSQL_ROOT_PASSWORD: foorootpassword + MYSQL_DATABASE: accounts + command: ["mysqld", "--character-set-server=utf8mb4", "--collation-server=utf8mb4_unicode_ci"] + + registry-maria: + image: mariadb:10.3 + container_name: registry-maria + networks: + - arxiv-submission-local environment: + BASE_SERVER: localhost:8000 + EXTERNAL_URL_SCHEME: http + MYSQL_USER: foouser + MYSQL_PASSWORD: foopass + MYSQL_ROOT_PASSWORD: foorootpassword + MYSQL_DATABASE: registry + command: ["mysqld", "--character-set-server=utf8mb4", "--collation-server=utf8mb4_unicode_ci"] + + # filemanager-maria: + # image: mariadb:10.3 + # container_name: filemanager-maria + # networks: + # - arxiv-submission-local + # environment: + # BASE_SERVER: localhost:8000 + # EXTERNAL_URL_SCHEME: http + # MYSQL_USER: foouser + # MYSQL_PASSWORD: foopass + # MYSQL_ROOT_PASSWORD: foorootpassword + # MYSQL_DATABASE: filemanager + # command: ["mysqld", "--character-set-server=utf8mb4", "--collation-server=utf8mb4_unicode_ci"] + + accounts: + image: arxiv/accounts:0.3 + container_name: accounts + environment: + BASE_SERVER: localhost:8000 + EXTERNAL_URL_SCHEME: http + REDIS_HOST: "accounts-redis-sessions" + REDIS_PORT: "6379" + REDIS_CLUSTER: 0 + LOGLEVEL: 10 + CLASSIC_DATABASE_URI: "mysql+mysqldb://foouser:foopass@accounts-maria:3306/accounts?charset=utf8" + JWT_SECRET: foosecret + CAPTCHA_SECRET: foocaptchasecret + CAPTCHA_FONT: /opt/arxiv/fonts/montserrat.ttf + CREATE_DB: 1 + networks: + - arxiv-submission-local + depends_on: + - accounts-redis-sessions + - accounts-maria + + registry: + image: arxiv/registry:0.1 + container_name: registry + environment: + BASE_SERVER: localhost:8000 + EXTERNAL_URL_SCHEME: http + REDIS_HOST: "accounts-redis-sessions" + REDIS_PORT: "6379" + REDIS_CLUSTER: "0" LOGLEVEL: 10 + REGISTRY_DATABASE_URI: "mysql+mysqldb://foouser:foopass@registry-maria:3306/registry?charset=utf8" + JWT_SECRET: foosecret + CREATE_DB: 1 + AUTHLIB_INSECURE_TRANSPORT: 1 networks: - arxiv-submission-local + depends_on: + - accounts-redis-sessions + - registry-maria + + # filemanager: + # image: arxiv/filemanager:0.1 + # container_name: filemanager + # command: uwsgi --http-socket :8000 -M -t 3000 --manage-script-name --buffer-size 65535 --processes 8 --threads 1 --async 100 --ugreen --mount /=wsgi.py --logformat "%(addr) %(addr) - %(user_id)|%(session_id) [%(rtime)] [%(uagent)] \"%(method) %(uri) %(proto)\" %(status) %(size) %(micros) %(ttfb)" + # + # environment: + # BASE_SERVER: localhost:8000 + # EXTERNAL_URL_SCHEME: http + # LOGLEVEL: 10 + # FILE_MANAGMENT_SQLALCHEMY_DATABASE_URI: "mysql+mysqldb://foouser:foopass@filemanager-maria:3306/filemanager?charset=utf8" + # JWT_SECRET: foosecret + # CREATE_DB: 1 + # networks: + # - arxiv-submission-local + # depends_on: + # - filemanager-maria - submission-gateway: + authenticator: + image: arxiv/authenticator:0.1 + container_name: authenticator + environment: + BASE_SERVER: localhost:8000 + EXTERNAL_URL_SCHEME: http + LOGLEVEL: 10 + REDIS_HOST: "accounts-redis-sessions" + REDIS_PORT: "6379" + REDIS_CLUSTER: "0" + JWT_SECRET: foosecret + networks: + - arxiv-submission-local + depends_on: + - accounts-redis-sessions + + gateway: build: context: ./gateway/ dockerfile: Dockerfile - container_name: submission-gateway - environment: - REDIS_ENDPOINT: "submission-redis:6379" - AWS_ACCESS_KEY_ID: "foo" - AWS_SECRET_ACCESS_KEY: "bar" - LOGLEVEL: 10 + container_name: gateway ports: - "8000:8000" networks: - arxiv-submission-local depends_on: - - submission-redis - - submission-metadata - - submission-authorization - # - submission-localstack + - accounts-redis-sessions + - authenticator + - registry + - accounts + + networks: arxiv-submission-local: diff --git a/docs/_images/submissionContainers.png b/docs/_images/submissionContainers.png deleted file mode 100644 index d904163..0000000 Binary files a/docs/_images/submissionContainers.png and /dev/null differ diff --git a/docs/_images/submissionState.png b/docs/_images/submissionState.png deleted file mode 100644 index eccaf7a..0000000 Binary files a/docs/_images/submissionState.png and /dev/null differ diff --git a/docs/_modules/api/context.html b/docs/_modules/api/context.html deleted file mode 100644 index 4fd1135..0000000 --- a/docs/_modules/api/context.html +++ /dev/null @@ -1,145 +0,0 @@ - - - - - - - api.context — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.context

-"""Helpers for working with Flask globals."""
-
-import os
-from typing import Union
-from flask import g, Flask
-from flask import current_app as flask_app
-import werkzeug
-
-
-
[docs]def get_application_config(app: Flask=None) -> Union[dict, os._Environ]: - """ - Get a configuration from the current app, or fall back to env. - - Parameters - ---------- - app : :class:`flask.Flask` - - Returns - ------- - dict-like - This is either the current Flask application configuration, or - ``os.environ``. Either of these should support the ``get()`` method. - """ - if app is not None: - if isinstance(app, Flask): - return app.config - if flask_app: # Proxy object; falsey if there is no application context. - return flask_app.config - return os.environ
- - -
[docs]def get_application_global() -> werkzeug.local.LocalProxy: - """ - Get the current application global proxy object. - - Returns - ------- - proxy or None - """ - if g: - return g - return None
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/controllers/submission.html b/docs/_modules/api/controllers/submission.html deleted file mode 100644 index 84edd7c..0000000 --- a/docs/_modules/api/controllers/submission.html +++ /dev/null @@ -1,290 +0,0 @@ - - - - - - - api.controllers.submission — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.controllers.submission

-"""Controllers for the external API."""
-
-import json
-from functools import wraps
-from datetime import datetime
-import copy
-from arxiv.base import logging
-from typing import Tuple, List, Callable, Optional
-
-from flask import url_for, current_app
-from werkzeug.exceptions import NotFound, BadRequest, InternalServerError
-
-from arxiv import status
-from events.domain.agent import Agent, agent_factory, System
-from events.domain import Event
-from events.domain.submission import Submission, Classification, License, \
-    SubmissionMetadata
-import events as ev
-
-from . import util
-
-logger = logging.getLogger(__name__)
-
-
-Response = Tuple[dict, int, dict]
-
-
-def _get_agents(headers: dict, user_data: dict, client_data: dict) \
-        -> Tuple[Agent, Agent, Optional[Agent]]:
-    user = ev.User(
-        native_id=user_data['user_id'],
-        email=user_data['email']
-    )
-    client = ev.Client(native_id=client_data['client_id'])
-    on_behalf_of = headers.get('X-On-Behalf-Of')
-    if on_behalf_of is not None:
-        proxy = user
-        user = ev.User(on_behalf_of, '', '')
-    else:
-        proxy = None
-    return user, client, proxy
-
-
-
[docs]@util.validate_request('schema/resources/submission.json') -def create_submission(data: dict, headers: dict, user_data: dict, - client_data: dict, token: str) -> Response: - """ - Create a new submission. - - Implements the hook for :meth:`sword.SWORDCollection.add_submission`. - - Parameters - ---------- - data : dict - Deserialized compact JSON-LD document. - headers : dict - Request headers from the client. - - Returns - ------- - dict - Response data. - int - HTTP status code. - dict - Headers to add to the response. - """ - logger.debug('Received request to create submission') - user, client, proxy = _get_agents(headers, user_data, client_data) - logger.debug(f'User: {user}; client: {client}, proxy: {proxy}') - - try: - submission, events = ev.save( - ev.CreateSubmissionEvent(creator=user, client=client, proxy=proxy), - *_update_submission(data, user, client, proxy) - ) - except ev.InvalidEvent as e: - raise InternalServerError(str(e)) from e - except ev.SaveError as e: - raise InternalServerError('Problem interacting with database') from e - except Exception as e: - raise InternalServerError('Encountered unhandled exception') from e - - response_headers = { - 'Location': url_for('submit.get_submission', - submission_id=submission.submission_id) - } - return submission.to_dict(), status.HTTP_201_CREATED, response_headers
- - -
[docs]def get_submission(submission_id: str, user: Optional[str] = None, - client: Optional[str] = None, - token: Optional[str] = None) -> Response: - """Retrieve the current state of a submission.""" - submission = ev.get_submission(submission_id) - return submission.to_dict(), status.HTTP_200_OK, {}
- - -
[docs]@util.validate_request('schema/resources/submission.json') -def update_submission(data: dict, headers: dict, user_data: dict, - client_data: dict, token: str, submission_id: str) \ - -> Response: - """Update the submission.""" - user, client, proxy = _get_agents(headers, user_data, client_data) - try: - submission, events = ev.save( - *_update_submission(data, user, client, proxy), - submission_id=submission_id - ) - except ev.NoSuchSubmission as e: - raise NotFound(f"No submission found with id {submission_id}") - except ev.InvalidEvent as e: - raise InternalServerError(str(e)) from e - except ev.SaveError as e: - raise InternalServerError('Problem interacting with database') from e - except Exception as e: - raise InternalServerError('Encountered unhandled exception') from e - - response_headers = { - 'Location': url_for('submit.get_submission', creator=user, - submission_id=submission.submission_id) - } - return submission.to_dict(), status.HTTP_200_OK, response_headers
- - -def _update_submission(data: dict, creator: Agent, client: Agent, - proxy: Optional[Agent] = None) -> List[Event]: - """ - Generate :class:`.ev.Event`(s) to update a :class:`Submission`. - - Parameters - ---------- - data : dict - creator : :class:`.Agent` - client : :class:`.Agent` - proxy : :class:`.Agent` - - Returns - ------- - list - - """ - # Since these are used in all Event instantiations, it's convenient to - # pack these together. - agents = dict(creator=creator, client=client, proxy=proxy) - - new_events = [] - if 'submitter_is_author' in data: - new_events.append( - ev.AssertAuthorshipEvent( - submitter_is_author=data['submitter_is_author'], - **agents, - ) - ) - if 'license' in data: - new_events.append( - ev.SelectLicenseEvent( - license_name=data['license'].get('name'), - license_uri=data['license']['uri'], - **agents - ) - ) - - if 'submitter_accepts_policy' in data and data['submitter_accepts_policy']: - new_events.append(ev.AcceptPolicyEvent(**agents)) - - # Generate both primary and secondary classifications. - if 'primary_classification' in data: - category = data['primary_classification']['category'] - new_events.append( - ev.SetPrimaryClassificationEvent(category=category, **agents) - ) - - for classification_datum in data.get('secondary_classification', []): - category = classification_datum['category'] - new_events.append( - ev.AddSecondaryClassificationEvent(category=category, **agents) - ) - - if 'metadata' in data: - metadata = [ - (key, data['metadata'][key]) - for key in SubmissionMetadata.FIELDS - if key in data['metadata'] - ] - new_events.append(ev.UpdateMetadataEvent(metadata=metadata, **agents)) - return new_events -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/controllers/tests.html b/docs/_modules/api/controllers/tests.html deleted file mode 100644 index da50369..0000000 --- a/docs/_modules/api/controllers/tests.html +++ /dev/null @@ -1,319 +0,0 @@ - - - - - - - api.controllers.tests — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.controllers.tests

-"""Tests for :mod:`api.controllers`."""
-
-from unittest import TestCase, mock
-from datetime import datetime
-from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
-
-from arxiv import status
-from events.domain import User, Submission
-from events import CreateSubmissionEvent, UpdateMetadataEvent, SaveError, \
-    InvalidEvent, NoSuchSubmission, SetPrimaryClassificationEvent
-from api.controllers import submission
-
-
-
[docs]def preserve_exceptions_and_events(mock_events): - """Add real exceptions back to the mock.""" - mock_events.SaveError = SaveError - mock_events.InvalidEvent = InvalidEvent - mock_events.NoSuchSubmission = NoSuchSubmission - mock_events.UpdateMetadataEvent = UpdateMetadataEvent - mock_events.CreateSubmissionEvent = CreateSubmissionEvent - mock_events.SetPrimaryClassificationEvent = \ - SetPrimaryClassificationEvent
- - -
[docs]class TestCreateSubmission(TestCase): - """Tests for :func:`.submission.create_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_create_submission_with_valid_data(self, mock_events, url_for): - """Create a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmissionEvent(creator=user)] - ) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - resp, stat, head = submission.create_submission(data, self.headers, - self.user_data, - self.client_data, - self.token) - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], CreateSubmissionEvent, - "Should pass a CreateSubmissionEvent first") - self.assertIsInstance(call_args[1], SetPrimaryClassificationEvent, - "Should pass a SetPrimaryClassificationEvent") - self.assertEqual(stat, status.HTTP_201_CREATED, - "Should return 201 Created when submission is" - " successfully created.") - self.assertIn('Location', head, "Should include a Location header.")
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_create_submission_with_invalid_data(self, mock_events, url_for): - """Trying to create a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'badkey': 'bizarre value', - } - with self.assertRaises(BadRequest): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_create_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_create_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = InvalidEvent - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- - -
[docs]class TestUpdateSubmission(TestCase): - """Tests for :func:`.submission.update_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_update_submission_with_valid_data(self, mock_events, url_for): - """Update a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmissionEvent(creator=user), - UpdateMetadataEvent(creator=user, - metadata=[('title', 'foo title')])] - ) - data = { - 'metadata': { - 'title': 'foo title' - } - } - resp, stat, head = submission.update_submission(data, self.headers, - self.user_data, - self.client_data, - self.token, 1) - self.assertEqual(stat, status.HTTP_200_OK, - "Should return 200 OK when submission is" - " successfully updated.") - self.assertIn('Location', head, "Should include a Location header.") - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], UpdateMetadataEvent, - "Should pass an UpdateMetadataEvent")
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_update_nonexistant_submission(self, mock_events, url_for): - """Trying to update a nonexistant submission throws exception.""" - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = NoSuchSubmission - url_for.return_value = '/foo/' - data = { - 'metadata': { - 'title': 'foo title' - } - } - with self.assertRaises(NotFound): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_update_submission_with_invalid_data(self, mock_events, url_for): - """Trying to update a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'badkey': 'bizarre value', - } - with self.assertRaises(BadRequest): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_update_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('api.controllers.submission.url_for') - @mock.patch('api.controllers.submission.ev') - def test_update_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = InvalidEvent - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- - -
[docs]class TestGetSubmission(TestCase): - """Tests for :func:`.submission.get_submission`."""
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/controllers/util.html b/docs/_modules/api/controllers/util.html deleted file mode 100644 index 9d00225..0000000 --- a/docs/_modules/api/controllers/util.html +++ /dev/null @@ -1,154 +0,0 @@ - - - - - - - api.controllers.util — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.controllers.util

-import os
-from typing import Callable, Tuple, Any
-from functools import wraps
-from werkzeug.exceptions import BadRequest
-from arxiv.util import schema
-
-
-Response = Tuple[dict, int, dict]
-
-
-
[docs]def validate_request(schema_path: str) -> Callable: - """ - Generate a route decorator that validates the request body. - - Parameters - ---------- - schema_path : str - Path (absolute, or relative to the execution path) to the JSON Schema - document. - Returns - ------- - decorator - Decorates a controller function with request body validation against - the specified JSON Schema. - - - """ - schema_path = os.path.join( - os.path.abspath(os.path.dirname(__file__)), - '../..', - schema_path - ) - validate = schema.load(schema_path) - - def _decorator(func: Callable) -> Callable: - @wraps(func) - def _wrpr(data: dict, *args: Any, **kwargs: Any) -> Response: - try: - validate(data) - except schema.ValidationError as e: - # A summary of the exception is on the first line of the repr. - msg = str(e).split('\n') - detail = { - 'reason': f'Metadata validation failed: {msg[0]}', - 'detail': ' '.join(msg) - } - raise BadRequest(msg[0], detail) - response: Tuple[dict, int, dict] = func(data, *args, **kwargs) - return response - return _wrpr - return _decorator
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/exceptions.html b/docs/_modules/api/exceptions.html deleted file mode 100644 index 1681e95..0000000 --- a/docs/_modules/api/exceptions.html +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - - api.exceptions — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.exceptions

-
[docs]class ValidationError(ValueError): - pass
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/factory.html b/docs/_modules/api/factory.html deleted file mode 100644 index 8c8068b..0000000 --- a/docs/_modules/api/factory.html +++ /dev/null @@ -1,124 +0,0 @@ - - - - - - - api.factory — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.factory

-"""Application factory for references service components."""
-
-from arxiv.base import logging
-
-from flask import Flask
-
-from api import routes
-from arxiv.base.middleware import wrap
-
-from authorization import middleware as auth
-
-
-
[docs]def create_web_app() -> Flask: - """Initialize an instance of the extractor backend service.""" - app = Flask('api') - app.config.from_pyfile('config.py') - - app.register_blueprint(routes.blueprint) - - wrap(app, [auth.AuthMiddleware]) - return app
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/api/routes.html b/docs/_modules/api/routes.html deleted file mode 100644 index df41526..0000000 --- a/docs/_modules/api/routes.html +++ /dev/null @@ -1,186 +0,0 @@ - - - - - - - api.routes — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for api.routes

-"""Provides External REST API."""
-
-from arxiv.base import logging
-from typing import Callable, Union
-from functools import wraps
-from flask.json import jsonify
-from flask import Blueprint, current_app, redirect, request, url_for, g, \
-    Response
-
-from authorization.decorators import scoped
-from arxiv import status
-from api.controllers import submission
-
-logger = logging.getLogger(__name__)
-
-blueprint = Blueprint('submit', __name__, url_prefix='/submit')
-
-
-
[docs]def json_response(func): - """Generate a wrapper for routes that JSONifies the response body.""" - @wraps(func) - def wrapper(*args, **kwargs): - r_body, r_status, r_headers = func(*args, **kwargs) - return jsonify(r_body), r_status, r_headers - return wrapper
- - -
[docs]@blueprint.route('/', methods=['POST']) -@json_response -@scoped('submission:write') -def create_submission() -> Union[str, Response]: - """Accept new submissions.""" - return submission.create_submission( - request.get_json(), - dict(request.headers), - user=g.user, - client=g.client, - token=g.token - )
- - -
[docs]@blueprint.route('/<string:submission_id>/', methods=['GET']) -@json_response -@scoped('submission:read') -def get_submission(submission_id: str) -> tuple: - """Get the current state of a submission.""" - return submission.get_submission( - submission_id, - user=g.user, - client=g.client, - token=g.token - )
- -# -# @blueprint.route('/<string:submission_id>/history/', methods=['GET']) -# @authorization.scoped(authorization.READ) -# @json_response -# def get_submission_history(submission_id: str) -> tuple: -# """Get the event log for a submission.""" -# return submission.get_submission_log( -# request.get_json(), -# dict(request.headers), -# submission_id=submission_id, -# user=g.user, -# client=g.client, -# scope=g.scope, -# token=g.token -# ) - - -
[docs]@blueprint.route('/<string:submission_id>/', methods=['POST']) -@json_response -@scoped('submission:write') -def update_submission(submission_id: str) -> tuple: - """Update the submission.""" - return submission.update_submission( - submission_id, - request.get_json(), - dict(request.headers), - user=g.user, - client=g.client, - token=g.token - )
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events.html b/docs/_modules/events.html deleted file mode 100644 index 1b829d6..0000000 --- a/docs/_modules/events.html +++ /dev/null @@ -1,379 +0,0 @@ - - - - - - - events — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events

-"""
-Core event-centric data abstraction for the submission & moderation subsystem.
-
-This package provides an event-based API for CRUD operations on submissions
-and submission-related (meta)data. Management of submission content (i.e.
-source files) is out of scope.
-
-Rather than perform CRUD operations directly on submission objects, all
-operations that modify submission data are performed through the creation of
-submission events. This ensures that we have a precise and complete record of
-activities concerning submissions, and provides an explicit definition of
-operations that can be performed within the arXiv submission system.
-
-Event classes are defined in :mod:`events.domain.event`, and are accessible
-from the root namespace of this package. Each event type defines a
-transformation/operation on a single submission, and defines the data required
-to perform that operation. Events are played forward, in order, to derive the
-state of a submission. For more information about how event types are defined,
-see :class:`events.domain.event.Event`.
-
-Using events
-============
-
-Event types are `PEP 557 data classes
-<https://www.python.org/dev/peps/pep-0557/>`_. Each event type inherits from
-:class:`.Event`, and may add additional fields. See :class:`.Event` for more
-information about common fields.
-
-To create a new event, initialize the class with the relevant
-data, and commit the event using :func:`.save`. For example:
-
-.. code-block:: python
-
-   >>> import events
-   >>> user = events.User(123, "joe@bloggs.com")
-   >>> metadata = [('title', 'A new theory of foo')]
-   >>> update = events.UpdateMetadata(creator=user, metadata=metadata)
-   >>> submission = events.save(creation, submission_id=12345)
-
-
-Several things will occur:
-
-1. If the events are for a submission that already exists, the latest state of
-   that submission will be obtained.
-2. New events will be validated and applied to the submission in the order that
-   they were passed to :func:`.save`. If an event is invalid (e.g. the
-   submission is not in an appropriate state for the operation), an
-   :class:`.InvalidEvent` exception will be raised. Note that at this point
-   nothing has been changed in the database; the attempt is simply abandoned.
-3. The new events are stored in the database, as is the latest state of the
-   submission. Creation of events and creation/update of the submission are
-   performed as a single atomic transaction. If anything goes wrong during the
-   update operation, all changes are abandoned and a :class:`.RuntimeError`
-   exception is raised.
-4. If the notification service is configured, a message about the event is
-   propagated as a Kinesis event on the configured stream. See
-   :mod:`events.services.notification` for details.
-
-
-Special case: creation
-----------------------
-Note that if the first event is a :class:`.CreateSubmission` the
-submission ID need not be provided, as we won't know what it is yet. For
-example:
-
-.. code-block:: python
-
-   import events
-
-   >>> user = events.User(123, "joe@bloggs.com")
-   >>> creation = events.CreateSubmission(creator=user)
-   >>> metadata = [('title', 'A new theory of foo')]
-   >>> update = events.UpdateMetadata(creator=user, metadata=metadata)
-   >>> submission = events.save(creation, update)
-   >>> submission.submission_id
-   40032
-
-
-"""
-
-from typing import Optional, List, Tuple
-from arxiv.base import logging
-from events.domain.submission import Submission, SubmissionMetadata, Author
-from events.domain.agent import Agent, User, System, Client
-from events.domain.event import (
-    Event, CreateSubmission, RemoveSubmission, VerifyContactInformation,
-    AssertAuthorship, AcceptPolicy, SetPrimaryClassification, UpdateMetadata,
-    AddSecondaryClassification, RemoveSecondaryClassification, SelectLicense,
-    AttachSourceContent, UpdateAuthors, CreateComment, DeleteComment,
-    AddDelegate, RemoveDelegate, FinalizeSubmission
-)
-from events.domain.rule import RuleCondition, RuleConsequence, EventRule
-from events.services import classic
-from events.exceptions import InvalidEvent, NoSuchSubmission, SaveError
-
-logger = logging.getLogger(__name__)
-
-
-
[docs]def load(submission_id: str) -> Tuple[Submission, List[Event]]: - """ - Load a submission and its history. - - Parameters - ---------- - submission_id : str - Submission identifier. - - Returns - ------- - :class:`events.domain.submission.Submission` - The current state of the submission. - list - Items are :class:`.Event`s, in order of their occurrence. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised when a submission with the passed ID cannot be found. - """ - try: - return classic.get_submission(submission_id) - except classic.NoSuchSubmission as e: - raise NoSuchSubmission(f'No submission with id {submission_id}') from e
- - -
[docs]def save(*events: Event, submission_id: Optional[str] = None) \ - -> Tuple[Submission, List[Event]]: - """ - Commit a set of new :class:`.Event`s for a submission. - - This will persist the events to the database, along with the final - state of the submission, and generate external notification(s) on the - appropriate channels. - - Parameters - ---------- - events : :class:`.Event` - Events to apply and persist. - submission_id : int - The unique ID for the submission, if available. If not provided, it is - expected that ``events`` includes a :class:`.CreateSubmission`. - - Returns - ------- - :class:`events.domain.submission.Submission` - The state of the submission after all events (including rule-derived - events) have been applied. Updated with the submission ID, if a - :class:`.CreateSubmission` was included. - list - A list of :class:`.Event` instances applied to the submission. Note - that this list may contain more events than were passed, if event - rules were triggered. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised if ``submission_id`` is not provided and the first event is not - a :class:`.CreateSubmission`, or ``submission_id`` is provided but - no such submission exists. - :class:`.InvalidEvent` - If an invalid event is encountered, the entire operation is aborted - and this exception is raised. - :class:`.SaveError` - There was a problem persisting the events and/or submission state - to the database. - - """ - if len(events) == 0: - raise ValueError('Must pass at least one event') - - # Do some sanity checks before proceeding. - for event in events: - if submission_id is not None: - if event.submission_id is None: - event.submission_id = submission_id - if event.submission_id != submission_id: - raise InvalidEvent(event, - "Can't mix events for multiple submissions") - - # We want to play events from the beginning. - if submission_id is not None: - existing_events = classic.get_events(submission_id) - else: - existing_events = [] - combined = existing_events + list(events) - - # Load any relevant event rules for this submission. - rules = [] # database.get_rules(submission_id) - - # Calculate the state of the submission from old and new events. - submission, combined = _apply_events(combined, rules) - - # Update the submission ID to ensure the existing submission is updated. - if submission.submission_id is None: - submission.submission_id = submission_id # May still be None. - - # Persist in database; submission ID is updated after transaction. - try: - submission = classic.store_events(*combined, submission=submission) - except classic.CommitFailed as e: - logger.debug('Encountered CommitFailed exception: %s', str(e)) - raise SaveError('Failed to store events') from e - - for event in combined: - event.submission_id = submission.submission_id - return submission, combined
- - -def _apply_rules(submission: Submission, event: Event, - rules: List[EventRule]) -> List[Event]: - """Generate new event(s) by applying rules to a submission event.""" - def _apply(rule: EventRule) -> bool: - return rule.condition(submission, event) - return [ - rule.consequence(submission, event) for rule in filter(_apply, rules) - ] - - -def _apply_events(events: List[Event], rules: List[EventRule], - submission: Optional[Submission] = None) \ - -> Tuple[Submission, List[Event]]: - """ - Apply a set of events in order. - - Parameters - ---------- - events : list - Items are :class:`.Event` instances. - rules : list - Items are :class:`.EventRule` instances. - submission : :class:`.Submission` or None - Starting state from which to begin applying ``events``. If - ``submission`` is not provided, ``events`` must contain a - :class:`.CreateSubmission`. - - Returns - ------- - :class:`.Submission` - Submission state after events have been applied. - list - Items are :class:`.Event`s that have been applied, including any - additional events generated by ``rules``. - - Raises - ------ - :class:`.NoSuchSubmission` - If ``submission`` is not provided, and the first event is not a - :class:`.CreateSubmission`, there's not much else to go on. - :class:`.InvalidEvent` - If an invalid event is encountered, the entire operation is aborted - and this exception is raised. - - """ - events = sorted(events, key=lambda e: e.created) - - # Need either a creation event or a submission state from which to start. - if not isinstance(events[0], CreateSubmission) and submission is None: - raise NoSuchSubmission('No creation, and submission not provided') - - extra_events: List[Event] = [] # Generated by applied rules. - for event in events: - if not event.valid(submission): - raise InvalidEvent(event) - - if isinstance(event, CreateSubmission): - submission = event.apply() - else: - submission = event.apply(submission) - - if not event.committed: # Don't create duplicate rule-derived events. - # Any rule-derived events should be applied before moving on. - _extra = _apply_rules(submission, event, rules) - if len(_extra) > 0: - submission, _extra = _apply_events(_extra, rules, submission) - extra_events += _extra - return submission, sorted(events + extra_events, key=lambda e: e.created) -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/context.html b/docs/_modules/events/context.html deleted file mode 100644 index 4914fb2..0000000 --- a/docs/_modules/events/context.html +++ /dev/null @@ -1,148 +0,0 @@ - - - - - - - events.context — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.context

-"""Helpers for working with Flask globals."""
-
-import os
-from typing import Optional, Union
-from flask import g, Flask
-from flask import current_app as flask_app
-import werkzeug
-
-
-
[docs]def get_application_config(app: Flask = None) -> Union[dict, os._Environ]: - """ - Get a configuration from the current app, or fall back to env. - - Parameters - ---------- - app : :class:`flask.Flask` - - Returns - ------- - dict-like - This is either the current Flask application configuration, or - ``os.environ``. Either of these should support the ``get()`` method. - """ - # pylint: disable=protected-access - if app is not None: - if isinstance(app, Flask): - return app.config # type: ignore - if flask_app: # Proxy object; falsey if there is no application context. - return flask_app.config # type: ignore - return os.environ
- - -
[docs]def get_application_global() -> Optional[werkzeug.local.LocalProxy]: - """ - Get the current application global proxy object. - - Returns - ------- - proxy or None - """ - if g: - return g # type: ignore - return None
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/domain/agent.html b/docs/_modules/events/domain/agent.html deleted file mode 100644 index beac200..0000000 --- a/docs/_modules/events/domain/agent.html +++ /dev/null @@ -1,242 +0,0 @@ - - - - - - - events.domain.agent — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.domain.agent

-"""Data structures for agents."""
-
-import hashlib
-from typing import Any, Optional
-
-from dataclasses import dataclass, field
-from dataclasses import asdict
-
-__all__ = ('Agent', 'User', 'System', 'Client', 'agent_factory')
-
-
-
[docs]@dataclass -class Agent: - """ - Base class for agents in the submission system. - - An agent is an actor/system that generates/is responsible for events. - """ - - native_id: str - """Type-specific identifier for the agent. This might be an URI.""" - - FIELDS = [] - - @property - def agent_type(self): - """The name of the agent instance's class.""" - return self.get_agent_type() - -
[docs] @classmethod - def get_agent_type(cls): - """Get the name of the instance's class.""" - return cls.__name__
- - @property - def agent_identifier(self): - """ - Unique identifier for the agent instance. - - Based on both the agent type and native ID. - """ - h = hashlib.new('sha1') - h.update(b'%s:%s' % (self.agent_type.encode('utf-8'), - str(self.native_id).encode('utf-8'))) - return h.hexdigest() - -
[docs] @classmethod - def from_dict(cls, data: dict) -> Any: - """Instantiate an :class:`.Agent` instance from a dict.""" - agent_type = data.pop('agent_type', None) - native_id = data.pop('native_id', None) - if agent_type is None and type(cls) is Agent: - raise ValueError('agent_type not provided') - return agent_factory(agent_type, native_id, **data)
- - def __eq__(self, other: Any) -> bool: - """Equality comparison for agents based on type and identifier.""" - if not isinstance(other, self.__class__): - return False - return self.agent_identifier == other.agent_identifier - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Agent`.""" - return { - 'native_id': self.native_id, - 'agent_type': self.agent_type - }
- - -
[docs]@dataclass -class User(Agent): - """An (human) end user.""" - - email: str - forename: str = field(default_factory=str) - surname: str = field(default_factory=str) - suffix: str = field(default_factory=str) - identifier: Optional[str] = field(default=None) - affiliation: str = field(default_factory=str) - - FIELDS = [ - 'email', 'forename', 'surname', 'suffix', 'identifier', 'affiliation' - ] - - @property - def name(self): - """Full name of the user.""" - return f"{self.forename} {self.surname} {self.suffix}" - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.User`.""" - data = super(User, self).to_dict() - data['name'] = self.name - data['forename'] = self.name - data['surname'] = self.name - data['suffix'] = self.name - data['email'] = self.email - data['identifier'] = self.identifier - data['affiliation'] = self.affiliation - return data
- - -# TODO: extend this to support arXiv-internal services. -
[docs]@dataclass -class System(Agent): - """The submission application (this application).""" - - FIELDS = []
- - -
[docs]@dataclass -class Client(Agent): - """A non-human third party, usually an API client.""" - - FIELDS = [] - -
[docs] def to_dict(self): - """Generate a dict representation of this :class:`.Client` instance.""" - return { - 'client_id': self.native_id - }
- - -_agent_types = { - User.get_agent_type(): User, - System.get_agent_type(): System, - Client.get_agent_type(): Client, -} - - -
[docs]def agent_factory(agent_type: str, native_id: Any, **extra) -> Agent: - """Instantiate a subclass of :class:`.Agent`.""" - if agent_type not in _agent_types: - raise ValueError(f'No such agent type: {agent_type}') - klass = _agent_types[agent_type] - extra = {k: v for k, v in extra.items() if k in klass.FIELDS} - return klass(native_id=native_id, **extra)
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/domain/event.html b/docs/_modules/events/domain/event.html deleted file mode 100644 index 4d85daa..0000000 --- a/docs/_modules/events/domain/event.html +++ /dev/null @@ -1,655 +0,0 @@ - - - - - - - events.domain.event — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.domain.event

-"""
-Data structures for submissions events.
-
-- Events have unique identifiers generated from their data (creation, agent,
-  submission).
-- Events provide methods to update a submission based on the event data.
-- Events provide validation methods for event data.
--
-"""
-
-import hashlib
-from datetime import datetime
-from typing import Optional, TypeVar, List, Tuple, Any, Dict
-from urllib.parse import urlparse
-from dataclasses import dataclass, field
-from dataclasses import asdict
-
-from arxiv.util import schema
-
-from .agent import Agent
-from .submission import Submission, SubmissionMetadata, Author, \
-    Classification, License, Delegation, Comment, Flag, Proposal, \
-    SubmissionContent
-
-from events.exceptions import InvalidEvent
-
-
-
[docs]@dataclass -class Event: - """Base class for submission-related events.""" - - creator: Agent - """ - The agent responsible for the operation represented by this event. - - This is **not** necessarily the creator of the submission. - """ - - created: datetime = field(default_factory=datetime.now) - """ - The timestamp when the event was originally committed. - - This should generally not be set from outside this package. - """ - - proxy: Optional[Agent] = field(default=None) - """ - The agent who facilitated the operation on behalf of the :prop:`.creator`. - - This may be an API client, or another user who has been designated as a - proxy. Note that proxy implies that the creator was not directly involved. - """ - - client: Optional[Agent] = field(default=None) - """ - The client through which the :prop:`.creator` performed the operation. - - If the creator was directly involved in the operation, this property should - be the client that facilitated the operation. - """ - - submission_id: Optional[int] = field(default=None) - """ - The primary identifier of the submission being operated upon. - - This is defined as optional to support creation events, and to facilitate - chaining of events with creation events in the same transaction. - """ - - committed: bool = field(default=False) - """ - Indicates whether the event has been committed to the database. - - This should generally not be set from outside this package. - """ - - @property - def event_type(self) -> str: - """The name (str) of the event type.""" - return self.get_event_type() - -
[docs] @classmethod - def get_event_type(cls) -> str: - """Get the name (str) of the event type.""" - return cls.__name__
- - @property - def event_id(self) -> str: - """The unique ID for this event.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.created.isoformat().encode('utf-8'), - self.event_type.encode('utf-8'), - self.creator.agent_identifier.encode('utf-8'))) - return h.hexdigest() - -
[docs] def valid(self, submission: Submission) -> bool: - """Determine whether this event is valid for the submission.""" - if submission and submission.published: - raise InvalidEvent(self, "Cannot alter a published submission") - - if not hasattr(self, 'validate'): - return True - try: - self.validate(submission) - except InvalidEvent: - return False - return True
- -
[docs] def apply(self, submission: Optional[Submission] = None) -> Submission: - """Apply the projection for this :class:`.Event` instance.""" - if submission: - submission = self.project(submission) - else: - submission = self.project() - submission.updated = self.created - return submission
- -
[docs] def to_dict(self): - """Generate a dict representation of this :class:`.Event`.""" - data = asdict(self) - data.update({ - 'creator': self.creator.to_dict(), - 'created': self.created.isoformat(), - }) - return data
- - -# Events related to the creation of a new submission. -# -# These are largely the domain of the metadata API, and the submission UI. - - -
[docs]@dataclass(init=False) -class CreateSubmission(Event): - """Creation of a new :class:`events.domain.submission.Submission`.""" - -
[docs] def project(self) -> Submission: - """Create a new :class:`.Submission`.""" - return Submission(creator=self.creator, created=self.created, - owner=self.creator, proxy=self.proxy, - client=self.client)
- - -
[docs]@dataclass(init=False) -class RemoveSubmission(Event): - """Removal of a :class:`events.domain.submission.Submission`.""" - -
[docs] def project(self, submission: Submission) -> Submission: - """Remove the :class:`.Submission` from the system (set inactive).""" - submission.active = False - return submission
- - -
[docs]@dataclass(init=False) -class VerifyContactInformation(Event): - """Submitter has verified their contact information.""" - -
[docs] def project(self, submission: Submission) -> Submission: - """Update :prop:`.Submission.submitter_contact_verified`.""" - submission.submitter_contact_verified = True - return submission
- - -
[docs]@dataclass -class AssertAuthorship(Event): - """The submitting user asserts whether they are an author of the paper.""" - - submitter_is_author: bool = True - -
[docs] def project(self, submission: Submission) -> Submission: - """Update the authorship flag on the submission.""" - submission.submitter_is_author = self.submitter_is_author - return submission
- - -
[docs]@dataclass -class AcceptPolicy(Event): - """The submitting user accepts the arXiv submission policy.""" - -
[docs] def project(self, submission: Submission) -> Submission: - """Set the policy flag on the submission.""" - submission.submitter_accepts_policy = True - return submission
- - -
[docs]@dataclass -class SetPrimaryClassification(Event): - """Update the primary classification of a submission.""" - - category: Optional[str] = None - - # TODO: this should validate against the arXiv taxonomy. -
[docs] def validate(self, submission: Submission) -> None: - """Validate the primary classification category.""" - try: - assert self.category - except AssertionError as e: - raise InvalidEvent(e) from e
- -
[docs] def project(self, submission: Submission) -> Submission: - """Set :prop:`.Submission.primary_classification`.""" - submission.primary_classification = Classification( - category=self.category - ) - return submission
- - -
[docs]@dataclass -class AddSecondaryClassification(Event): - """Add a secondary :class:`.Classification` to a submission.""" - - category: Optional[str] = field(default=None) - - # TODO: this should validate against the arXiv taxonomy. -
[docs] def validate(self, submission: Submission) -> None: - """Validate the secondary classification category to add.""" - try: - assert self.category - except AssertionError as e: - raise InvalidEvent(e) from e
- -
[docs] def project(self, submission: Submission) -> Submission: - """Append to :prop:`.Submission.secondary_classification`.""" - submission.secondary_classification.append(Classification( - category=self.category - )) - return submission
- - -
[docs]@dataclass -class RemoveSecondaryClassification(Event): - """Remove secondary :class:`.Classification` from submission.""" - - category: Optional[str] = field(default=None) - - # TODO: this should validate against the arXiv taxonomy. -
[docs] def validate(self, submission: Submission) -> None: - """Validate the secondary classification category to remove.""" - try: - assert self.category - except AssertionError as e: - raise InvalidEvent(e) from e
- -
[docs] def project(self, submission: Submission) -> Submission: - """Remove from :prop:`.Submission.secondary_classification`.""" - submission.secondary_classification = [ - classn for classn in submission.secondary_classification - if not classn.category == self.category - ] - return submission
- - -
[docs]@dataclass -class SelectLicense(Event): - """The submitter has selected a license for their submission.""" - - license_name: Optional[str] = field(default=None) - license_uri: Optional[str] = field(default=None) - -
[docs] def project(self, submission: Submission) -> Submission: - """Set :prop:`.Submission.license`.""" - submission.license = License( - name=self.license_name, - uri=self.license_uri - ) - return submission
- - -# TODO: consider representing some of these as distinct events/commands? -
[docs]@dataclass -class UpdateMetadata(Event): - """Update the descriptive metadata for a submission.""" - - metadata: List[Tuple[str, Any]] = field(default_factory=list) - - FIELDS = [ - 'title', 'abstract', 'doi', 'msc_class', 'acm_class', - 'report_num', 'journal_ref' - ] - - # TODO: implement more specific validation here. -
[docs] def validate(self, submission: Submission) -> None: - """The :prop:`.metadata` should be a list of tuples.""" - try: - assert len(self.metadata) >= 1 - assert type(self.metadata[0]) in [tuple, list] - for metadatum in self.metadata: - assert len(metadatum) == 2 - except AssertionError as e: - raise InvalidEvent(self) from e
- -
[docs] def project(self, submission: Submission) -> Submission: - """Update metadata on a :class:`.Submission`.""" - for key, value in self.metadata: - setattr(submission.metadata, key, value) - return submission
- - -
[docs]@dataclass -class UpdateAuthors(Event): - """Update the authors on a :class:`.Submission`.""" - - authors: List[Author] = field(default_factory=list) - -
[docs] def project(self, submission: Submission) -> Submission: - """Replace :prop:`.Submission.metadata.authors`.""" - submission.metadata.authors = self.authors - return submission
- -
[docs] @classmethod - def from_dict(cls, **data) -> Submission: - """Override the default ``from_dict`` constructor to handle authors.""" - if 'authors' not in data: - raise ValueError('Missing authors') - data['authors'] = [Author(**au) for au in data['authors']] - return cls(**data)
- - -
[docs]@dataclass -class AttachSourceContent(Event): - """Add metadata about a source package to a submission.""" - - location: str = field(default_factory=str) - format: str = field(default_factory=str) - checksum: str = field(default_factory=str) - mime_type: str = field(default_factory=str) - identifier: Optional[int] = field(default=None) - size: int = field(default=0) - - # TODO: This should be configurable somewhere. - ALLOWED_FORMATS = [ - 'pdftex', 'tex', 'pdf', 'ps', 'html', 'invalid' - ] - ALLOWED_MIME_TYPES = [ - 'application/tar+gzip', 'application/tar', 'application/zip' - ] - -
[docs] def validate(self, submission: Submission) -> None: - """Validate data for :class:`.SubmissionContent`.""" - try: - parsed = urlparse(self.location) - except ValueError as e: - raise InvalidEvent('Not a valid URL') from e - if not parsed.netloc.endswith('arxiv.org'): - raise InvalidEvent('External URLs not allowed.') - - if self.format not in self.ALLOWED_FORMATS: - raise InvalidEvent(f'Format {self.package_format} not allowed') - if not self.checksum: - raise InvalidEvent('Missing checksum') - if not self.identifier: - raise InvalidEvent('Missing upload ID')
- -
[docs] def project(self, submission: Submission) -> Submission: - """Replace :class:`.SubmissionContent` metadata on the submission.""" - submission.source_content = SubmissionContent( - location=self.location, - format=self.format, - checksum=self.checksum, - identifier=self.identifier, - mime_type=self.mime_type, - size=self.size - ) - return submission
- - -
[docs]@dataclass -class FinalizeSubmission(Event): - """Send the submission to the queue for announcement.""" - -
[docs] def validate(self, submission: Submission) -> None: - """Ensure that all required data/steps are complete.""" - if submission.finalized: - raise InvalidEvent(self, "Submission already finalized") - if not submission.active: - raise InvalidEvent(self, "Submision must be active") - - try: - assert submission.creator is not None - assert submission.primary_classification is not None - assert submission.metadata.title is not None - assert submission.metadata.abstract is not None - assert len(submission.metadata.authors) > 0 - assert submission.submitter_contact_verified - assert submission.submitter_accepts_policy - assert submission.license is not None - assert submission.source_content is not None - except AssertionError as e: - raise InvalidEvent(self, "Submission missing required data") from e
- -
[docs] def project(self, submission: Submission) -> Submission: - """Set :prop:`Submission.finalized`.""" - submission.finalized = True - return submission
- - -# Moderation-related events. - -
[docs]@dataclass -class CreateComment(Event): - """Creation of a :class:`.Comment` on a :class:`.Submission`.""" - - read_scope = 'submission:moderate' - write_scope = 'submission:moderate' - - body: str = field(default_factory=str) - scope: str = 'private' - -
[docs] def validate(self, submission: Submission) -> None: - """The :prop:`.body` should be set.""" - if not self.body: - raise ValueError('Comment body not set')
- -
[docs] def project(self, submission: Submission) -> Submission: - """Create a new :class:`.Comment` and attach it to the submission.""" - comment = Comment(creator=self.creator, created=self.created, - proxy=self.proxy, submission=submission, - body=self.body, scope=self.scope) - submission.comments[comment.comment_id] = comment - return submission
- - -
[docs]@dataclass -class DeleteComment(Event): - """Deletion of a :class:`.Comment` on a :class:`.Submission`.""" - - read_scope = 'submission:moderate' - write_scope = 'submission:moderate' - - comment_id: str = field(default_factory=str) - -
[docs] def validate(self, submission: Submission) -> None: - """The :prop:`.comment_id` must present on the submission.""" - if self.comment_id is None: - raise InvalidEvent('comment_id is required') - if not hasattr(submission, 'comments') or not submission.comments: - raise InvalidEvent('Cannot delete comment that does not exist') - if self.comment_id not in submission.comments: - raise InvalidEvent('Cannot delete comment that does not exist')
- -
[docs] def project(self, submission: Submission) -> Submission: - """Remove the comment from the submission.""" - del submission.comments[self.comment_id] - return submission
- - -
[docs]@dataclass -class AddDelegate(Event): - """Owner delegates authority to another agent.""" - - delegate: Optional[Agent] = None - -
[docs] def validate(self, submission: Submission) -> None: - """The event creator must be the owner of the submission.""" - if not self.creator == submission.owner: - raise InvalidEvent('Event creator must be submission owner')
- -
[docs] def project(self, submission: Submission) -> Submission: - """Add the delegate to the submission.""" - delegation = Delegation( - creator=self.creator, - delegate=self.delegate, - created=self.created - ) - submission.delegations[delegation.delegation_id] = delegation - return submission
- - -
[docs]@dataclass -class RemoveDelegate(Event): - """Owner revokes authority from another agent.""" - - delegation_id: str = field(default_factory=str) - -
[docs] def validate(self, submission: Submission) -> None: - """The event creator must be the owner of the submission.""" - if not self.creator == submission.owner: - raise InvalidEvent('Event creator must be submission owner')
- -
[docs] def project(self, submission: Submission) -> Submission: - """Remove the delegate from the submission.""" - if self.delegation_id in submission.delegations: - del submission.delegations[self.delegation_id] - return submission
- - -# class CreateSourcePackage(Event): -# pass -# -# class UpdateSourcePackage(Event): -# pass -# -# -# class DeleteSourcePackage(Event): -# pass -# -# -# class Annotation(Event): -# pass -# -# -# class CreateFlagEvent(AnnotationEvent): -# pass -# -# -# class DeleteFlagEvent(AnnotationEvent): -# pass -# -# -# class DeleteCommentEvent(AnnotationEvent): -# pass -# -# -# class CreateProposalEvent(AnnotationEvent): -# pass -# -# -# class DeleteProposalEvent(AnnotationEvent): -# pass - -EVENT_TYPES = { - obj.get_event_type(): obj for obj in locals().values() - if type(obj) is type and issubclass(obj, Event) -} - - -
[docs]def event_factory(event_type: str, **data) -> Event: - """ - Convenience factory for generating :class:`.Event`s. - - Parameters - ---------- - event_type : str - Should be the name of a :class:`.Event` subclass. - data : kwargs - Keyword parameters passed to the event constructor. - - Return - ------ - :class:`.Event` - An instance of an :class:`.Event` subclass. - """ - if 'created' not in data: - data['created'] = datetime.now() - if event_type in EVENT_TYPES: - klass = EVENT_TYPES[event_type] - if hasattr(klass, 'from_dict'): - return klass.from_dict(**data) - return EVENT_TYPES[event_type](**data) - raise RuntimeError('Unknown event type: %s' % event_type)
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/domain/rule.html b/docs/_modules/events/domain/rule.html deleted file mode 100644 index ee1a353..0000000 --- a/docs/_modules/events/domain/rule.html +++ /dev/null @@ -1,189 +0,0 @@ - - - - - - - events.domain.rule — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.domain.rule

-"""
-Conditional business logic as data.
-
-This is here for demonstration purposes only, and is likely to change
-substantially in the short term.
-"""
-
-from datetime import datetime
-from typing import Callable, TypeVar, Optional
-
-from dataclasses import dataclass, field
-from dataclasses import asdict
-
-from .agent import Agent, System
-from .event import Event, event_factory
-from .submission import Submission
-
-
-EventRuleType = TypeVar('EventRuleType', bound='EventRule')
-
-
-
[docs]@dataclass -class RuleCondition: - """Evaluate whether or not the rule applies to an event.""" - - event_type: type - submission_id: Optional[int] = None - extra_condition: Optional[dict] = None - - def __call__(self, submission: Submission, event: Event) -> bool: - """Evaluate whether or not the rule applies to an event.""" - return type(event) is self.event_type and \ - self._callable_from_condition(submission, event) \ - and (self.submission_id is None - or self.submission_id == submission.submission_id) - - # TODO: implement some kind of DSL for evaluating submission state? - @property - def _callable_from_condition(self) -> Callable: - return lambda sub, event: True
- - -
[docs]@dataclass -class RuleConsequence: - """Generate a new event as a result of the rule.""" - - event_type: type - """The type of event to apply when the rule is triggered.""" - event_data: dict - """Data for the event applied when the rule is triggered.""" - - event_creator: Agent = field(default_factory=System) - """The agent responsible for the consequent event.""" - - def __call__(self, submission: Submission, event: Event) -> Event: - """Generate a new event as a result of the rule.""" - data = { # These are effectively defaults. - 'creator': self.event_creator, - 'proxy': None, - 'submission_id': submission.submission_id - } - data.update(self.event_data) - data['created'] = datetime.now() - # new_event = event_factory(self.event_type, **data) - new_event = self.event_type(**data) - if new_event.submission_id is None: - new_event.submission_id = submission.submission_id - if new_event.creator is None: - new_event.creator = self.event_creator - return new_event
- - -
[docs]@dataclass -class EventRule: - """Expresses conditional business logic to generate automated events.""" - - creator: Agent - condition: RuleCondition - consequence: RuleConsequence - rule_id: Optional[int] = None - proxy: Optional[Agent] = None - created: datetime = field(default_factory=datetime.now) - applied: bool = False - """Whether or not the rule has already been triggered and applied."""
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/domain/submission.html b/docs/_modules/events/domain/submission.html deleted file mode 100644 index 33997ca..0000000 --- a/docs/_modules/events/domain/submission.html +++ /dev/null @@ -1,382 +0,0 @@ - - - - - - - events.domain.submission — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.domain.submission

-"""Data structures for submissions."""
-
-import hashlib
-from typing import Optional, Dict, TypeVar, List
-from datetime import datetime
-
-from dataclasses import dataclass, field
-from dataclasses import asdict
-
-from .agent import Agent
-
-
-
[docs]@dataclass -class Classification: - """An archive/category classification for a :class:`.Submission`.""" - - category: str - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Classification`.""" - return asdict(self)
- - -
[docs]@dataclass -class License: - """An license for distribution of the submission.""" - - uri: str - name: Optional[str] = None - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.License`.""" - return asdict(self)
- - -
[docs]@dataclass -class Author: - """Represents an author of a submission.""" - - order: int - forename: str = field(default_factory=str) - surname: str = field(default_factory=str) - initials: str = field(default_factory=str) - affiliation: str = field(default_factory=str) - email: str = field(default_factory=str) - identifier: Optional[str] = None - - def __post_init__(self) -> None: - """Auto-generate an identifier, if not provided.""" - if not self.identifier: - self.identifier = self._generate_identifier() - - def _generate_identifier(self): - h = hashlib.new('sha1') - h.update(bytes(':'.join([self.forename, self.surname, self.initials, - self.affiliation, self.email]), - encoding='utf-8')) - return h.hexdigest() - - @property - def canonical(self): - """Canonical representation of the author name.""" - name = "%s %s %s" % (self.forename, self.initials, self.surname) - name = name.replace(' ', ' ') - if self.affiliation: - return "%s (%s)" % (name, self.affiliation) - return name - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Author`.""" - data = asdict(self) - data['canonical'] = self.canonical - return data
- - -
[docs]@dataclass -class SubmissionContent: - """Metadata about the submission source package and compiled products.""" - - location: str - format: str - mime_type: str - size: str - checksum: str - identifier: int
- - -
[docs]@dataclass -class SubmissionMetadata: - """Metadata about a :class:`.Submission` instance.""" - - title: Optional[str] = None - abstract: Optional[str] = None - - authors: list = field(default_factory=list) - - doi: Optional[str] = None - msc_class: Optional[str] = None - acm_class: Optional[str] = None - report_num: Optional[str] = None - journal_ref: Optional[str] = None - - comments: str = field(default_factory=str) - - @property - def authors_canonical(self): - """Canonical representation of submission authors.""" - return ", ".join([au.canonical for au in self.authors]) - -
[docs] def to_dict(self) -> dict: - """Generate dict representation of :class:`.SubmissionMetadata`.""" - data = asdict(self) - data['authors_canonical'] = self.authors_canonical - return data
- - -
[docs]@dataclass -class Delegation: - """Delegation of editing privileges to a non-owning :class:`.Agent`.""" - - delegate: Agent - creator: Agent - created: datetime = field(default_factory=datetime.now) - - @property - def delegation_id(self): - """Unique identifer for the delegation instance.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.delegate.agent_identifier, - self.creator.agent_identifier, - self.created.isodate())) - return h.hexdigest() - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Delegation`.""" - data = asdict(self) - data['delegation_id'] = self.delegation_id - return data
- - -
[docs]@dataclass -class Submission: - """Represents an arXiv submission object.""" - - WORKING = 'working' - PROCESSING = 'processing' - SUBMITTED = 'submitted' - ON_HOLD = 'hold' - SCHEDULED = 'scheduled' - PUBLISHED = 'published' - DELETED = 'deleted' - - creator: Agent - owner: Agent - created: datetime - updated: Optional[datetime] = field(default=None) - - source_content: Optional[SubmissionContent] = field(default=None) - compiled_content: List[SubmissionContent] = field(default_factory=list) - - primary_classification: Optional[Classification] = field(default=None) - delegations: Dict[str, Delegation] = field(default_factory=dict) - proxy: Optional[Agent] = field(default=None) - client: Optional[Agent] = field(default=None) - submission_id: Optional[int] = field(default=None) - metadata: SubmissionMetadata = field(default_factory=SubmissionMetadata) - active: bool = field(default=True) - """Actively moving through the submission workflow.""" - - finalized: bool = field(default=False) - """Submitter has indicated submission is ready for publication.""" - - published: bool = field(default=False) - secondary_classification: List[Classification] = \ - field(default_factory=list) - submitter_contact_verified: bool = field(default=False) - submitter_is_author: bool = field(default=True) - submitter_accepts_policy: bool = field(default=False) - license: Optional[License] = field(default=None) - status: str = field(default=WORKING) - arxiv_id: Optional[str] = field(default=None) - """The published arXiv paper ID.""" - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Submission`.""" - data = asdict(self) - data.update({ - 'creator': self.creator.to_dict(), - 'owner': self.owner.to_dict(), - 'created': self.created.isoformat(), - }) - if self.client: - data.update({'client': self.client.to_dict()}) - if self.primary_classification: - data['primary_classification'] = \ - self.primary_classification.to_dict() - if self.delegations: - data['delegations'] = { - key: delegation.to_dict() - for key, delegation in self.delegations.items() - } - if self.proxy: - data['proxy'] = self.proxy.to_dict() - if self.metadata: - data['metadata'] = self.metadata.to_dict() - if self.license: - data['license'] = self.license.to_dict() - return data
- - -
[docs]@dataclass -class Annotation: - """Auxilliary metadata used by the submission and moderation process.""" - - creator: Agent - submission: Submission - created: datetime - scope: str # TODO: document this. - proxy: Optional[Agent] - - @property - def annotation_type(self): - """Name (str) of the type of annotation.""" - return type(self).__name__ - - @property - def annotation_id(self): - """The unique identifier for an :class:`.Annotation` instance.""" - h = hashlib.new('sha1') - h.update(b'%s:%s:%s' % (self.created.isoformat().encode('utf-8'), - self.annotation_type.encode('utf-8'), - self.creator.agent_identifier.encode('utf-8'))) - return h.hexdigest() - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Annotation`.""" - data = asdict(self) - data['annotation_type'] = self.annotation_type - data['annotation_id'] = self.annotation_id - return data
- - -
[docs]@dataclass -class Proposal(Annotation): - """Represents a proposal to apply an event to a submission.""" - - event_type: type - event_data: dict - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Proposal`.""" - return asdict(self)
- - -
[docs]@dataclass -class Comment(Annotation): - """A freeform textual annotation.""" - - body: str - - @property - def comment_id(self): - """The unique identifier for a :class:`.Comment` instance.""" - return self.annotation_id - -
[docs] def to_dict(self) -> dict: - """Generate a dict representation of this :class:`.Comment`.""" - data = asdict(self) - data['comment_id'] = self.comment_id - return data
- - -
[docs]@dataclass -class Flag(Annotation): - """Tags used to route submissions based on moderation policies.""" - - pass
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/exceptions.html b/docs/_modules/events/exceptions.html deleted file mode 100644 index 81fc69e..0000000 --- a/docs/_modules/events/exceptions.html +++ /dev/null @@ -1,128 +0,0 @@ - - - - - - - events.exceptions — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.exceptions

-"""Exceptions raised during event handling."""
-
-from typing import TypeVar
-
-EventType = TypeVar('EventType', bound='core.events.domain.event.Event')
-
-
-
[docs]class InvalidEvent(ValueError): - """Raised when an invalid event is encountered.""" - - def __init__(self, event: EventType, extra: str='') -> None: - """Use the :class:`.Event` to build an error message.""" - self.event: EventType = event - msg = f"Invalid event: {event.event_type} ({event.event_id}): {extra}" - super(InvalidEvent, self).__init__(msg)
- - -
[docs]class NoSuchSubmission(Exception): - """An operation was performed on/for a submission that does not exist."""
- - -
[docs]class SaveError(RuntimeError): - """Failed to persist event state."""
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/classic.html b/docs/_modules/events/services/classic.html deleted file mode 100644 index e21a7c4..0000000 --- a/docs/_modules/events/services/classic.html +++ /dev/null @@ -1,412 +0,0 @@ - - - - - - - events.services.classic — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.classic

-"""
-Integration with the classic database to persist events and submission state.
-
-As part of the classic renewal strategy, development of new submission
-interfaces must maintain data interoperability with classic components. This
-service module must therefore do two main things:
-
-1. Store and provide access to event data generated during the submission
-   process, and
-2. Keep the classic database tables up to date so that "downstream" components
-   can continue to operate. Since classic components work directly on
-   submission tables, persisting events and resulting submission state must
-   occur in the same transaction.
-
-An additional challenge is representing changes to submission state made by
-classic components, since those changes will be made directly to submission
-tables and not involve event-generation. See :func:`get_submission` for
-details.
-
-ORM representations of the classic database tables involved in submission
-are located in :mod:`.classic.models`. An additional model, :class:`.DBEvent`,
-is defined in the current module.
-"""
-
-from typing import List, Optional, Generator, Dict, Union, Tuple
-from contextlib import contextmanager
-
-from flask import Flask
-from sqlalchemy import Column, String, DateTime, ForeignKey, \
-    create_engine
-from sqlalchemy.ext.indexable import index_property
-from sqlalchemy.orm import relationship
-from sqlalchemy.engine import Engine
-from sqlalchemy.ext.declarative import declarative_base
-from sqlalchemy.orm import sessionmaker
-from sqlalchemy.orm.session import Session
-
-from arxiv.base import logging
-from events.domain.event import Event, event_factory
-from events.domain.submission import License, Submission
-from events.domain.agent import User, Client, Agent
-from . import models, util
-from .models import Base
-from .exceptions import NoSuchSubmission, CommitFailed, ClassicBaseException
-from arxiv.base.globals import get_application_config, get_application_global
-
-logger = logging.getLogger(__name__)
-
-
-
[docs]class DBEvent(Base): # type: ignore - """Database representation of an :class:`.Event`.""" - - __tablename__ = 'event' - - event_id = Column(String(40), primary_key=True) - event_type = Column(String(255)) - proxy = Column(util.FriendlyJSON) - proxy_id = index_property('proxy', 'agent_identifier') - - creator = Column(util.FriendlyJSON) - creator_id = index_property('creator', 'agent_identifier') - - created = Column(DateTime) - data = Column(util.FriendlyJSON) - submission_id = Column( - ForeignKey('arXiv_submissions.submission_id'), - index=True - ) - - submission = relationship("Submission") - -
[docs] def to_event(self) -> Event: - """ - Instantiate an :class:`.Event` using event data from this instance. - - Returns - ------- - :class:`.Event` - - """ - _skip = ['creator', 'proxy', 'submission_id', 'created', 'event_type'] - data = { - key: value for key, value in self.data.items() - if key not in _skip - } - data['committed'] = True, # Since we're loading from the DB. - return event_factory( - self.event_type, - creator=Agent.from_dict(self.creator), - proxy=Agent.from_dict(self.proxy) if self.proxy else None, - submission_id=self.submission_id, - created=self.created, - **data - )
- - -
[docs]@contextmanager -def transaction() -> Generator: - """Context manager for database transaction.""" - session = current_session() - try: - yield session - session.commit() - except ClassicBaseException as e: - logger.debug('Commit failed, rolling back: %s', str(e)) - session.rollback() - raise # Propagate exceptions raised from this module. - except Exception as e: - logger.debug('Commit failed, rolling back: %s', str(e)) - session.rollback() - raise CommitFailed('Failed to commit transaction') from e
- - -
[docs]def get_licenses() -> List[License]: - """Get a list of :class:`.License`s available for new submissions.""" - license_data = current_session().query(models.License) \ - .filter(models.License.active == '1') - return [License(uri=row.name, name=row.label) for row in license_data]
- - -
[docs]def get_events(submission_id: int) -> List[Event]: - """ - Load events from the classic database. - - Parameters - ---------- - submission_id : int - - Returns - ------- - list - Items are :class:`.Event` instances loaded from the class DB. - - Raises - ------ - :class:`.NoSuchSubmission` - Raised when there are no events for the provided submission ID. - - """ - with transaction() as session: - event_data = session.query(DBEvent) \ - .filter(DBEvent.submission_id == submission_id) \ - .order_by(DBEvent.created) - if not event_data: # No events, no dice. - raise NoSuchSubmission(f'Submission {submission_id} not found') - return [datum.to_event() for datum in event_data]
- - -
[docs]def get_submission(submission_id: int) -> Tuple[Submission, List[Event]]: - """ - Get the current state of a :class:`.Submission` from the database. - - In the medium term, services that use this package will need to - play well with legacy services that integrate with the classic - database. For example, the moderation system does not use the event - model implemented here, and will therefore cause direct changes to the - submission tables that must be reflected in our representation of the - submission. - - Until those legacy components are replaced, we will need to load both the - event stack and the current DB state of the submission, and use the DB - state to patch fields that may have changed outside the purview of the - event model. - - Parameters - ---------- - submission_id : int - - Returns - ------- - :class:`.Submission` - """ - # Load and play events. Eventually, this is the only query we will make - # against the database. - events = get_events(submission_id) - submission = None # We assume that the first event is a creation. - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with transaction() as session: - # Load the current db state of the submission, and patch. Once we have - # retired legacy components that do not follow the event model, this - # step should be removed. - data = session.query(models.Submission).get(submission_id) - if data is None: - raise NoSuchSubmission(f'Submission {submission_id} not found') - return data.patch(submission), events
- - -
[docs]def store_events(*events: Event, submission: Submission) -> Submission: - """ - Store events in the database. - - Parameters - ---------- - events : list - A list of (presumably new) :class:`.Event` instances to be persisted. - Events that have already been committed will not be committed again, - so it's safe to include them here. - submission : :class:`.Submission` - Current state of the submission (after events have been applied). - - Returns - ------- - :class:`.Submission` - Stored submission, updated with current submission ID. - """ - # Commit new events for a single submission in a transaction. - with transaction() as session: - # We need a reference to this row for the event rows, so we add it - # first. - if submission.submission_id is None: - db_submission = models.Submission() - else: - db_submission = session.query(models.Submission)\ - .get(submission.submission_id) - if db_submission is None: - raise RuntimeError("Submission ID is set, but can't find data") - - # Update the submission state from the Submission domain object. - db_submission.update_from_submission(submission) - session.add(db_submission) - - for event in events: - if event.committed: # Don't create duplicate event entries. - continue - - if event.committed: - raise RuntimeError('Event is already committed') - db_event = DBEvent( - event_type=event.event_type, - event_id=event.event_id, - data=event.to_dict(), - created=event.created, - creator=event.creator.to_dict(), - proxy=event.proxy.to_dict() if event.proxy else None, - submission_id=event.submission_id - ) - session.add(db_event) - db_event.submission = db_submission # Will be updated on commit. - event.committed = True - submission.submission_id = db_submission.submission_id - return submission
- - -
[docs]def init_app(app: object = None) -> None: - """Set default configuration parameters for an application instance.""" - config = get_application_config(app) - config.setdefault('CLASSIC_DATABASE_URI', 'sqlite://')
- - -
[docs]def get_engine(app: object = None) -> Engine: - """Get a new :class:`.Engine` for the classic database.""" - config = get_application_config(app) - database_uri = config.get('CLASSIC_DATABASE_URI', 'sqlite://') - return create_engine(database_uri)
- - -# TODO: consider making this private. -
[docs]def get_session(app: object = None) -> Session: - """Get a new :class:`.Session` for the classic database.""" - engine = current_engine() - return sessionmaker(bind=engine)()
- - -
[docs]def current_engine() -> Engine: - """Get/create :class:`.Engine` for this context.""" - g = get_application_global() - if not g: - return get_engine() - if 'classic_engine' not in g: - g.classic_engine = get_engine() # type: ignore - return g.classic_engine # type: ignore
- - -
[docs]def current_session() -> Session: - """Get/create :class:`.Session` for this context.""" - g = get_application_global() - if not g: - return get_session() - if 'classic' not in g: - g.classic = get_session() # type: ignore - return g.classic # type: ignore
- - -
[docs]def create_all() -> None: - """Create all tables in the database.""" - Base.metadata.create_all(current_engine())
- - -
[docs]def drop_all() -> None: - """Drop all tables in the database.""" - Base.metadata.drop_all(current_engine())
- - -# # TODO: find a better way! -# def _declare_event() -> type: -# """ -# Define DBEvent model. -# -# This is deferred until runtime so that we can inject an alternate model -# for testing. This is less than ideal, but (so far) appears to be the only -# way to effectively replace column data types, which we need in order to -# use JSON columns with SQLite. -# """ -# -# return DBEvent -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/classic/exceptions.html b/docs/_modules/events/services/classic/exceptions.html deleted file mode 100644 index 866546f..0000000 --- a/docs/_modules/events/services/classic/exceptions.html +++ /dev/null @@ -1,120 +0,0 @@ - - - - - - - events.services.classic.exceptions — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.classic.exceptions

-"""Exceptions raised by :mod:`events.services.classic`."""
-
-
-
[docs]class ClassicBaseException(RuntimeError): - """Base for classic service exceptions."""
- - -
[docs]class NoSuchSubmission(ClassicBaseException): - """A request was made for a submission that does not exist."""
- - -
[docs]class CommitFailed(ClassicBaseException): - """Raised when there was a problem committing changes to the database."""
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/classic/models.html b/docs/_modules/events/services/classic/models.html deleted file mode 100644 index 2a5aa9e..0000000 --- a/docs/_modules/events/services/classic/models.html +++ /dev/null @@ -1,796 +0,0 @@ - - - - - - - events.services.classic.models — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.classic.models

-"""SQLAlchemy ORM classes for the classic database."""
-
-import json
-from typing import Optional
-from datetime import datetime
-from sqlalchemy import Column, Date, DateTime, Enum, ForeignKey, Text, text, \
-    ForeignKeyConstraint, Index, Integer, SmallInteger, String, Table
-from sqlalchemy.orm import relationship, joinedload
-
-from sqlalchemy.ext.declarative import declarative_base
-
-from events import domain
-
-Base = declarative_base()
-
-
-
[docs]class Submission(Base): # type: ignore - """Represents an arXiv submission.""" - - __tablename__ = 'arXiv_submissions' - - NEW = 0 - STARTED = 1 - FILES_ADDED = 2 - PROCESSED = 3 - METADATA_ADDED = 4 - SUBMITTED = 5 - STAGES = [NEW, STARTED, FILES_ADDED, PROCESSED, METADATA_ADDED, SUBMITTED] - - NOT_SUBMITTED = 0 # Working. - SUBMITTED = 1 # Enqueued for moderation, to be scheduled. - ON_HOLD = 2 - UNUSED = 3 - NEXT_DAY = 4 # Scheduled for tomorrow. - PROCESSING = 5 - NEEDS_EMAIL = 6 - - PUBLISHED = 7 - DELETED_PUBLISHED = 27 - """Published and files expired.""" - - PROCESSING_SUBMISSION = 8 - REMOVED = 9 - - USER_DELETED = 10 - ERROR_STATE = 19 - - DELETED_EXPIRED = 20 - """Was working but expired.""" - DELETED_ON_HOLD = 22 - DELETED_PROCESSING = 25 - - DELETED_REMOVED = 29 - DELETED_USER = 30 - """User deleted and files expired.""" - - DELETED = [ - USER_DELETED, DELETED_ON_HOLD, DELETED_PROCESSING, - DELETED_REMOVED, DELETED_USER - ] - - NEW_SUBMSSION = 'new' - REPLACEMENT = 'rep' - JOURNAL_REFERENCE = 'jref' - WITHDRAWAL = 'dr' - - WITHDRAWN_FORMAT = 'withdrawn' - - submission_id = Column(Integer, primary_key=True) - document_id = Column( - ForeignKey('arXiv_documents.document_id', - ondelete='CASCADE', - onupdate='CASCADE'), - index=True - ) - doc_paper_id = Column(String(20), index=True) - sword_id = Column(ForeignKey('arXiv_tracking.sword_id'), index=True) - userinfo = Column(Integer, server_default=text("'0'")) - is_author = Column(Integer, nullable=False, server_default=text("'0'")) - agree_policy = Column(Integer, server_default=text("'0'")) - viewed = Column(Integer, server_default=text("'0'")) - stage = Column(Integer, server_default=text("'0'")) - submitter_id = Column( - ForeignKey('tapir_users.user_id', ondelete='CASCADE', - onupdate='CASCADE'), - index=True - ) - submitter_name = Column(String(64)) - submitter_email = Column(String(64)) - created = Column(DateTime) - updated = Column(DateTime) - status = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - sticky_status = Column(Integer) - must_process = Column(Integer, server_default=text("'1'")) - submit_time = Column(DateTime) - release_time = Column(DateTime) - source_size = Column(Integer, server_default=text("'0'")) - source_format = Column(String(12)) - source_flags = Column(String(12)) - has_pilot_data = Column(Integer) - is_withdrawn = Column(Integer, nullable=False, server_default=text("'0'")) - title = Column(Text) - authors = Column(Text) - comments = Column(Text) - proxy = Column(String(255)) - report_num = Column(Text) - msc_class = Column(String(255)) - acm_class = Column(String(255)) - journal_ref = Column(Text) - doi = Column(String(255)) - abstract = Column(Text) - license = Column(ForeignKey('arXiv_licenses.name', onupdate='CASCADE'), - index=True) - version = Column(Integer, nullable=False, server_default=text("'1'")) - type = Column(String(8), index=True) - is_ok = Column(Integer, index=True) - admin_ok = Column(Integer) - allow_tex_produced = Column(Integer, server_default=text("'0'")) - is_oversize = Column(Integer, server_default=text("'0'")) - remote_addr = Column(String(16), nullable=False, server_default=text("''")) - remote_host = Column(String(255), nullable=False, - server_default=text("''")) - package = Column(String(255), nullable=False, server_default=text("''")) - rt_ticket_id = Column(Integer, index=True) - auto_hold = Column(Integer, server_default=text("'0'")) - - document = relationship('Document') - arXiv_license = relationship('License') - submitter = relationship('User') - sword = relationship('Tracking') - categories = relationship('SubmissionCategory', - back_populates="submission", lazy='joined') - -
[docs] def patch(self, submission: domain.Submission) -> domain.Submission: - """ - Patch a :class:`.Submission` with data outside the event scope. - - There are several fields that may change after a submission enters the - classic moderation and publication system, that cannot be inferred - from the event stack. - - Parameters - ---------- - submission : :class:`.domain.Submission` - The submission object to patch. - - Returns - ------- - :class:`.domain.Submission` - The same submission that was passed; now patched with data outside - the scope of the event model. - - """ - # Status changes. - submission.status = self._get_status() - submission.active = (submission.status not in [submission.DELETED, - submission.PUBLISHED]), - submission.published = (submission.status == submission.PUBLISHED) - submission.arxiv_id = self._get_arxiv_id() - - # Possible reclassification. - primary = self.primary_classification - if primary: - submission.primary_classification = domain.Classification( - category=primary.category - ) - submission.secondary_classification = [ - domain.Classification(category=db_cat.category) - for db_cat in self.categories - if db_cat.is_primary == 0 - ] - - # Comments (admins may modify). - submission.metadata.comments = self.comments - return submission
- -
[docs] def to_submission(self) -> domain.Submission: - """ - Generate a representation of submission state from a DB instance. - - Returns - ------- - :class:`.domain.Submission` - - """ - status = self._get_status() - primary = self.primary_classification - submitter = domain.User( - native_id=self.submitter.user_id, - email=self.submitter.email, - forename=self.submitter.first_name, - surname=self.submitter.last_name, - suffix=self.submitter.suffix_name - ) - return domain.Submission( - creator=submitter, - owner=submitter, - created=self.created, - updated=self.updated, - submitter_is_author=bool(self.is_author), - submitter_accepts_policy=bool(self.agree_policy), - submitter_contact_verified=bool(self.userinfo), - status=status, - finalized=(status != domain.Submission.WORKING), - active=(status not in [domain.Submission.DELETED, - domain.Submission.PUBLISHED]), - published=(status == domain.Submission.PUBLISHED), - metadata=domain.SubmissionMetadata( - title=self.title, - abstract=self.abstract, - comments=self.comments, - report_num=self.report_num, - doi=self.doi, - msc_class=self.msc_class, - acm_class=self.acm_class, - journal_ref=self.journal_ref - ), - license=domain.License( - uri=self.arXiv_license.name, - name=self.arXiv_license.label - ) if self.arXiv_license else None, - primary_classification=domain.Classification( - category=primary.category - ) if primary else None, - secondary_classification=[ - domain.Classification(category=db_cat.category) - for db_cat in self.categories - if db_cat.is_primary == 0 - ] - )
- -
[docs] def update_from_submission(self, submission: domain.Submission) -> None: - """Update this database object from a :class:`.domain.Submission`.""" - self.submitter_id = submission.creator.native_id - self.submitter_name = submission.creator.name - self.submitter_email = submission.creator.email - self.is_author = int(submission.submitter_is_author) - self.agree_policy = int(submission.submitter_accepts_policy) - self.userinfo = int(submission.submitter_contact_verified) - self.created = submission.created - self.updated = datetime.now() - self.title = submission.metadata.title - self.abstract = submission.metadata.abstract - self.authors = submission.metadata.authors_canonical - self.comments = submission.metadata.comments - self.report_num = submission.metadata.report_num - self.doi = submission.metadata.doi - self.msc_class = submission.metadata.msc_class - self.acm_class = submission.metadata.acm_class - self.journal_ref = submission.metadata.journal_ref - if submission.license: - self.license = submission.license.uri - self.type = Submission.NEW # We're not handling other types here. - - if submission.source_content is not None: - self.must_process = 0 - self.source_size = submission.source_content.size - self.source_format = submission.source_content.format - - # Only update the submission state if we're transitioning for the first - # time. We can relax this later, but for now it will prevent us from - # doing something stupid. - if submission.finalized and self.status is Submission.NOT_SUBMITTED: - self.status = Submission.SUBMITTED - self.submit_time = submission.updated - - if submission.primary_classification: - self._update_primary(submission) - self._update_secondaries(submission) - - self._update_submitter(submission)
- - @property - def primary_classification(self): - """Get the primary classification for this submission.""" - categories = [ - db_cat for db_cat in self.categories if db_cat.is_primary == 1 - ] - try: - return categories[0] - except IndexError: - return - - def _get_arxiv_id(self) -> Optional[str]: - if not self.document: - return - return self.document.paper_id - - def _get_status(self) -> str: - """Map classic status codes to :class:`.domain.Submission` status.""" - if self._get_arxiv_id() is not None: - return domain.Submission.PUBLISHED - elif self.status is self.NOT_SUBMITTED: - return domain.Submission.WORKING - elif self.status is self.SUBMITTED: - return domain.Submission.PROCESSING - elif self.status is self.ON_HOLD: - return domain.Submission.ON_HOLD - elif self.status is self.NEXT_DAY: - return domain.Submission.SCHEDULED - elif self.status in self.DELETED: - return domain.Submission.DELETED - # TODO: raise something? - - def _update_submitter(self, submission: domain.Submission) -> None: - """Update submitter information.""" - self.submitter_id = submission.creator.native_id - - def _update_primary(self, submission: domain.Submission) -> None: - """Update primary classification.""" - primary_category = submission.primary_classification.category - cur_primary = self.primary_classification - - if cur_primary and cur_primary.category != primary_category: - self.categories.remove(cur_primary) - self.categories.append( - SubmissionCategory(submission_id=self.submission_id, - category=primary_category) - ) - elif cur_primary is None and primary_category: - self.categories.append( - SubmissionCategory( - submission_id=self.submission_id, - category=primary_category, - is_primary=1 - ) - ) - - def _update_secondaries(self, submission: domain.Submission) -> None: - """Update secondary classifications.""" - cur_secondaries = [ - db_cat.category for db_cat - in self.categories if db_cat.is_primary == 0 - ] - tgt_secondaries = [ - cat.category for cat in submission.secondary_classification - ] - # Remove any categories that have been removed from the Submission. - for db_cat in self.categories: - if db_cat.is_primary == 1: - continue - if db_cat.category not in tgt_secondaries: - self.categories.remove(db_cat) - - # Add any new secondaries - for cat in submission.secondary_classification: - if cat.category not in cur_secondaries: - self.categories.append( - SubmissionCategory( - submission_id=self.submission_id, - category=cat.category, - is_primary=0 - ) - )
- - -
[docs]class License(Base): # type: ignore - """Licenses available for submissions.""" - - __tablename__ = 'arXiv_licenses' - - name = Column(String(255), primary_key=True) - """This is the URI of the license.""" - - label = Column(String(255)) - """Display label for the license.""" - - active = Column(Integer, server_default=text("'1'")) - """Only offer licenses with active=1.""" - - note = Column(String(255)) - sequence = Column(Integer)
- - -
[docs]class CategoryDef(Base): # type: ignore - """Classification categories available for submissions.""" - - __tablename__ = 'arXiv_category_def' - - category = Column(String(32), primary_key=True) - name = Column(String(255)) - active = Column(Integer, server_default=text("'1'"))
- - -
[docs]class SubmissionCategory(Base): # type: ignore - """Classification relation for submissions.""" - - __tablename__ = 'arXiv_submission_category' - - submission_id = Column( - ForeignKey('arXiv_submissions.submission_id', - ondelete='CASCADE', onupdate='CASCADE'), - primary_key=True, - nullable=False, - index=True - ) - category = Column( - ForeignKey('arXiv_category_def.category'), - primary_key=True, - nullable=False, - index=True, - server_default=text("''") - ) - is_primary = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - is_published = Column(Integer, index=True, server_default=text("'0'")) - - # category_def = relationship('CategoryDef') - submission = relationship('Submission', back_populates='categories')
- - -
[docs]class Document(Base): # type: ignore - """ - Represents a published arXiv paper. - - This is here so that we can look up the arXiv ID after a submission is - published. - """ - - __tablename__ = 'arXiv_documents' - - document_id = Column(Integer, primary_key=True) - paper_id = Column(String(20), nullable=False, unique=True, - server_default=text("''")) - title = Column(String(255), nullable=False, index=True, - server_default=text("''")) - authors = Column(Text) - """Canonical author string.""" - - dated = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - - primary_subject_class = Column(String(16)) - - created = Column(DateTime) - - submitter_email = Column(String(64), nullable=False, index=True, - server_default=text("''")) - submitter_id = Column(ForeignKey('tapir_users.user_id'), index=True) - submitter = relationship('User')
- - -
[docs]class DocumentCategory(Base): # type: ignore - """Relation between published arXiv papers and their classifications.""" - - __tablename__ = 'arXiv_document_category' - - document_id = Column( - ForeignKey('arXiv_documents.document_id', ondelete='CASCADE'), - primary_key=True, - nullable=False, - index=True, - server_default=text("'0'") - ) - category = Column( - ForeignKey('arXiv_category_def.category'), - primary_key=True, - nullable=False, - index=True - ) - """E.g. cs.CG, cond-mat.dis-nn, etc.""" - is_primary = Column(Integer, nullable=False, server_default=text("'0'")) - - category_def = relationship('CategoryDef') - document = relationship('Document')
- - -
[docs]class User(Base): # type: ignore - """Represents an arXiv user.""" - - __tablename__ = 'tapir_users' - - user_id = Column(Integer, primary_key=True) - first_name = Column(String(50), index=True) - last_name = Column(String(50), index=True) - suffix_name = Column(String(50)) - share_first_name = Column(Integer, nullable=False, - server_default=text("'1'")) - share_last_name = Column(Integer, nullable=False, - server_default=text("'1'")) - email = Column(String(255), nullable=False, unique=True, - server_default=text("''")) - share_email = Column(Integer, nullable=False, server_default=text("'8'")) - email_bouncing = Column(Integer, nullable=False, - server_default=text("'0'")) - policy_class = Column(ForeignKey('tapir_policy_classes.class_id'), - nullable=False, index=True, - server_default=text("'0'")) - """ - +----------+---------------+ - | class_id | name | - +----------+---------------+ - | 1 | Administrator | - | 2 | Public user | - | 3 | Legacy user | - +----------+---------------+ - """ - - joined_date = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - joined_ip_num = Column(String(16), index=True) - joined_remote_host = Column(String(255), nullable=False, - server_default=text("''")) - flag_internal = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - flag_edit_users = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - flag_edit_system = Column(Integer, nullable=False, - server_default=text("'0'")) - flag_email_verified = Column(Integer, nullable=False, - server_default=text("'0'")) - flag_approved = Column(Integer, nullable=False, index=True, - server_default=text("'1'")) - flag_deleted = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - flag_banned = Column(Integer, nullable=False, index=True, - server_default=text("'0'")) - flag_wants_email = Column(Integer, nullable=False, - server_default=text("'0'")) - flag_html_email = Column(Integer, nullable=False, - server_default=text("'0'")) - tracking_cookie = Column(String(255), nullable=False, index=True, - server_default=text("''")) - flag_allow_tex_produced = Column(Integer, nullable=False, - server_default=text("'0'")) - - tapir_policy_class = relationship('PolicyClass')
- - -# TODO: what is this? -
[docs]class PolicyClass(Base): # type: ignore - """Defines user roles in the system.""" - - __tablename__ = 'tapir_policy_classes' - - class_id = Column(SmallInteger, primary_key=True) - name = Column(String(64), nullable=False, server_default=text("''")) - description = Column(Text, nullable=False) - password_storage = Column(Integer, nullable=False, - server_default=text("'0'")) - recovery_policy = Column(Integer, nullable=False, - server_default=text("'0'")) - permanent_login = Column(Integer, nullable=False, - server_default=text("'0'"))
- - -
[docs]class Tracking(Base): # type: ignore - """Record of SWORD submissions.""" - - __tablename__ = 'arXiv_tracking' - - tracking_id = Column(Integer, primary_key=True) - sword_id = Column(Integer, nullable=False, unique=True, - server_default=text("'00000000'")) - paper_id = Column(String(32), nullable=False) - submission_errors = Column(Text) - timestamp = Column(DateTime, nullable=False, - server_default=text("CURRENT_TIMESTAMP"))
- - -
[docs]class ArchiveCategory(Base): # type: ignore - """Maps categories to the archives in which they reside.""" - - __tablename__ = 'arXiv_archive_category' - - archive_id = Column(String(16), primary_key=True, nullable=False, - server_default=text("''")) - category_id = Column(String(32), primary_key=True, nullable=False)
- - -
[docs]class ArchiveDef(Base): # type: ignore - """Defines the archives in the arXiv classification taxonomy.""" - - __tablename__ = 'arXiv_archive_def' - - archive = Column(String(16), primary_key=True, server_default=text("''")) - name = Column(String(255))
- - -
[docs]class ArchiveGroup(Base): # type: ignore - """Maps archives to the groups in which they reside.""" - - __tablename__ = 'arXiv_archive_group' - - archive_id = Column(String(16), primary_key=True, nullable=False, - server_default=text("''")) - group_id = Column(String(16), primary_key=True, nullable=False, - server_default=text("''"))
- - -
[docs]class Archive(Base): # type: ignore - """Supplemental data about archives in the classification hierarchy.""" - - __tablename__ = 'arXiv_archives' - - archive_id = Column(String(16), primary_key=True, - server_default=text("''")) - in_group = Column(ForeignKey('arXiv_groups.group_id'), nullable=False, - index=True, server_default=text("''")) - archive_name = Column(String(255), nullable=False, - server_default=text("''")) - start_date = Column(String(4), nullable=False, server_default=text("''")) - end_date = Column(String(4), nullable=False, server_default=text("''")) - subdivided = Column(Integer, nullable=False, server_default=text("'0'")) - - arXiv_group = relationship('Group')
- - -
[docs]class GroupDef(Base): # type: ignore - """Defines the groups in the arXiv classification taxonomy.""" - - __tablename__ = 'arXiv_group_def' - - archive_group = Column(String(16), primary_key=True, - server_default=text("''")) - name = Column(String(255))
- - -
[docs]class Group(Base): # type: ignore - """Supplemental data about groups in the classification hierarchy.""" - - __tablename__ = 'arXiv_groups' - - group_id = Column(String(16), primary_key=True, server_default=text("''")) - group_name = Column(String(255), nullable=False, server_default=text("''")) - start_year = Column(String(4), nullable=False, server_default=text("''"))
- - -
[docs]class EndorsementDomain(Base): # type: ignore - """Endorsement configurations.""" - - __tablename__ = 'arXiv_endorsement_domains' - - endorsement_domain = Column(String(32), primary_key=True, - server_default=text("''")) - endorse_all = Column(Enum('y', 'n'), nullable=False, - server_default=text("'n'")) - mods_endorse_all = Column(Enum('y', 'n'), nullable=False, - server_default=text("'n'")) - endorse_email = Column(Enum('y', 'n'), nullable=False, - server_default=text("'y'")) - papers_to_endorse = Column(SmallInteger, nullable=False, - server_default=text("'4'"))
- - -
[docs]class Category(Base): # type: ignore - """Supplemental data about arXiv categories, including endorsement.""" - - __tablename__ = 'arXiv_categories' - - arXiv_endorsement_domain = relationship('EndorsementDomain') - - archive = Column( - ForeignKey('arXiv_archives.archive_id'), - primary_key=True, - nullable=False, - server_default=text("''") - ) - """E.g. cond-mat, astro-ph, cs.""" - arXiv_archive = relationship('Archive') - - subject_class = Column(String(16), primary_key=True, nullable=False, - server_default=text("''")) - """E.g. AI, spr-con, str-el, CO, EP.""" - - definitive = Column(Integer, nullable=False, server_default=text("'0'")) - active = Column(Integer, nullable=False, server_default=text("'0'")) - """Only use rows where active == 1.""" - - category_name = Column(String(255)) - endorse_all = Column( - Enum('y', 'n', 'd'), - nullable=False, - server_default=text("'d'") - ) - endorse_email = Column( - Enum('y', 'n', 'd'), - nullable=False, - server_default=text("'d'") - ) - endorsement_domain = Column( - ForeignKey('arXiv_endorsement_domains.endorsement_domain'), - index=True - ) - """E.g. astro-ph, acc-phys, chem-ph, cs.""" - - papers_to_endorse = Column(SmallInteger, nullable=False, - server_default=text("'0'"))
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/classic/tests.html b/docs/_modules/events/services/classic/tests.html deleted file mode 100644 index ae30e1e..0000000 --- a/docs/_modules/events/services/classic/tests.html +++ /dev/null @@ -1,429 +0,0 @@ - - - - - - - events.services.classic.tests — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.classic.tests

-"""
-Integration tests for the classic database service.
-
-These tests assume that SQLAlchemy's MySQL backend is implemented correctly:
-instead of using a live MySQL database, they use an in-memory SQLite database.
-This is mostly fine (they are intended to be more-or-less swappable). The one
-iffy bit is the JSON datatype, which is not available by default in the SQLite
-backend, and so we inject a simple one here. End to end tests with a live MySQL
-database will provide more confidence in this area.
-"""
-
-from unittest import TestCase, mock
-import os
-from datetime import datetime
-from contextlib import contextmanager
-import json
-
-from flask import Flask
-
-from events.domain.agent import User
-from events.domain.submission import License, Submission, Author
-from events.domain.event import CreateSubmission, UpdateMetadata, \
-    FinalizeSubmission, SetPrimaryClassification, AddSecondaryClassification, \
-    SelectLicense, SetPrimaryClassification, AcceptPolicy, \
-    VerifyContactInformation
-from events.domain.agent import User
-from events.services import classic
-
-
-
[docs]@contextmanager -def in_memory_db(): - """Provide an in-memory sqlite database for testing purposes.""" - app = Flask('foo') - app.config['SQLALCHEMY_DATABASE_URI'] = 'sqlite://' - app.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False - - with app.app_context(): - classic.init_app(app) - classic.create_all() - try: - yield classic.current_session() - except Exception: - raise - finally: - classic.drop_all()
- - -
[docs]class TestGetLicenses(TestCase): - """Test :func:`.classic.get_licenses`.""" - -
[docs] def test_get_all_active_licenses(self): - """Return a :class:`.License` for each active license in the db.""" - # mock_util.json_factory.return_value = SQLiteJSON - - with in_memory_db() as session: - session.add(classic.models.License( - name="http://arxiv.org/licenses/assumed-1991-2003", - sequence=9, - label="Assumed arXiv.org perpetual, non-exclusive license to", - active=0 - )) - session.add(classic.models.License( - name="http://creativecommons.org/licenses/publicdomain/", - sequence=4, - label="Creative Commons Public Domain Declaration", - active=1 - )) - session.commit() - licenses = classic.get_licenses() - - self.assertEqual(len(licenses), 1, - "Only the active license should be returned.") - self.assertIsInstance(licenses[0], License, - "Should return License instances.") - self.assertEqual(licenses[0].uri, - "http://creativecommons.org/licenses/publicdomain/", - "Should use name column to populate License.uri") - self.assertEqual(licenses[0].name, - "Creative Commons Public Domain Declaration", - "Should use label column to populate License.name")
- - -
[docs]class TestStoreEvents(TestCase): - """Test :func:`.classic.store_events`.""" - -
[docs] def test_store_event(self): - """Store a single event.""" - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - submission = ev.apply() - submission = classic.store_events(ev, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(db_submission.submitter_id, - submission.creator.native_id, - "The native ID of the creator should be used") - self.assertEqual(db_submission.status, db_submission.NOT_SUBMITTED, - "Submission in database should be in status 0 (not" - " submitted) by default.")
- -
[docs] def test_store_events_with_metadata(self): - """Store events and attendant submission with metadata.""" - metadata = { - 'title': 'foo title', - 'abstract': 'very abstract', - 'comments': 'indeed', - 'msc_class': 'foo msc', - 'acm_class': 'computer-y', - 'doi': '10.01234/5678', - 'journal_ref': 'Nature 1: 1', - 'authors': [Author(order=0, forename='Joe', surname='Bloggs')] - } - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = UpdateMetadata(creator=user, - metadata=list(metadata.items())) - - submission = ev.apply() - submission = ev2.apply(submission) - submission = classic.store_events(ev, ev2, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - - db_events = session.query(classic.DBEvent).all() - - for key, value in metadata.items(): - if key == 'authors': - continue - self.assertEqual(getattr(db_submission, key), value, - f"The value of {key} should be {value}") - self.assertEqual(db_submission.authors, - submission.metadata.authors_canonical, - "The canonical author string should be used to" - " update the submission in the database.") - - self.assertEqual(len(db_events), 2, "Two events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set")
- -
[docs] def test_store_events_with_finalized_submission(self): - """Store events and a finalized submission.""" - with in_memory_db() as session: - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = FinalizeSubmission(creator=user) - submission = ev.apply() - submission = ev2.apply(submission) - submission = classic.store_events(ev, ev2, submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - db_events = session.query(classic.DBEvent).all() - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(len(db_events), 2, "Two events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set")
- -
[docs] def test_store_events_with_classification(self): - """Store events including classification.""" - user = User(12345, 'joe@joe.joe') - ev = CreateSubmission(creator=user) - ev2 = SetPrimaryClassification(creator=user, - category='physics.soc-ph') - ev3 = AddSecondaryClassification(creator=user, - category='physics.acc-ph') - submission = ev.apply() - submission = ev2.apply(submission) - submission = ev3.apply(submission) - - with in_memory_db() as session: - submission = classic.store_events(ev, ev2, ev3, - submission=submission) - - db_submission = session.query(classic.models.Submission)\ - .get(submission.submission_id) - db_events = session.query(classic.DBEvent).all() - - self.assertEqual(db_submission.submission_id, submission.submission_id, - "The submission should be updated with the PK id.") - self.assertEqual(len(db_events), 3, "Three events should be stored") - for db_event in db_events: - self.assertEqual(db_event.submission_id, submission.submission_id, - "The submission id should be set") - self.assertEqual(len(db_submission.categories), 2, - "Two category relations should be set") - self.assertEqual(db_submission.primary_classification.category, - submission.primary_classification.category, - "Primary classification should be set.")
- - -
[docs]class TestGetSubmission(TestCase): - """Test :func:`.classic.get_submission`.""" - -
[docs] def test_get_submission_that_does_not_exist(self): - """Test that an exception is raised when submission doesn't exist.""" - with in_memory_db(): - with self.assertRaises(classic.exceptions.NoSuchSubmission): - classic.get_submission(1)
- -
[docs] def test_get_submission_with_publish(self): - """Test that publication state is reflected in submission data.""" - user = User(12345, 'joe@joe.joe') - events = [ - CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[ - ('title', 'Foo title'), - ('abstract', 'Indeed'), - ('authors', [ - Author(order=0, forename='Joe', surname='Bloggs', - email='joe@blo.ggs'), - Author(order=1, forename='Jane', surname='Doe', - email='j@doe.com'), - ]) - ]), - SelectLicense(creator=user, license_uri='http://foo.org/1.0/', - license_name='Foo zero 1.0'), - SetPrimaryClassification(creator=user, category='cs.DL'), - AcceptPolicy(creator=user), - VerifyContactInformation(creator=user), - FinalizeSubmission(creator=user) - ] - submission = None - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with in_memory_db() as session: - # User creates and finalizes submission. - submission = classic.store_events(*events, submission=submission) - ident = submission.submission_id - - # Moderation happens, things change outside the event model. - db_submission = session.query(classic.models.Submission).get(ident) - - # Published! - db_submission.status = db_submission.PUBLISHED - db_document = classic.models.Document(paper_id='1234.5678') - db_submission.document = db_document - session.add(db_submission) - session.add(db_document) - session.commit() - - # Now get the submission. - submission_loaded, _ = classic.get_submission(ident) - - self.assertEqual(submission.metadata.title, - submission_loaded.metadata.title, - "Event-derived metadata should be preserved.") - self.assertEqual(submission_loaded.arxiv_id, "1234.5678", - "arXiv paper ID should be set") - self.assertEqual(submission_loaded.status, Submission.PUBLISHED, - "Submission status should reflect publish action")
- -
[docs] def test_get_submission_with_hold_and_reclass(self): - """Test changes made externally are reflected in submission data.""" - user = User(12345, 'joe@joe.joe') - events = [ - CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[ - ('title', 'Foo title'), - ('abstract', 'Indeed'), - ('authors', [ - Author(order=0, forename='Joe', surname='Bloggs', - email='joe@blo.ggs'), - Author(order=1, forename='Jane', surname='Doe', - email='j@doe.com'), - ]) - ]), - SelectLicense(creator=user, license_uri='http://foo.org/1.0/', - license_name='Foo zero 1.0'), - SetPrimaryClassification(creator=user, category='cs.DL'), - AcceptPolicy(creator=user), - VerifyContactInformation(creator=user), - FinalizeSubmission(creator=user) - ] - submission = None - for ev in events: - submission = ev.apply(submission) if submission else ev.apply() - - with in_memory_db() as session: - # User creates and finalizes submission. - submission = classic.store_events(*events, submission=submission) - ident = submission.submission_id - - # Moderation happens, things change outside the event model. - db_submission = session.query(classic.models.Submission).get(ident) - - # Reclassification! - session.delete(db_submission.primary_classification) - session.add(classic.models.SubmissionCategory( - submission_id=ident, category='cs.IR', is_primary=1 - )) - - # On hold! - db_submission.status = db_submission.ON_HOLD - session.add(db_submission) - session.commit() - - # Now get the submission. - submission_loaded, _ = classic.get_submission(ident) - - self.assertEqual(submission.metadata.title, - submission_loaded.metadata.title, - "Event-derived metadata should be preserved.") - self.assertEqual(submission_loaded.primary_classification.category, - "cs.IR", - "Primary classification should reflect the" - " reclassification that occurred outside the purview" - " of the event model.") - self.assertEqual(submission_loaded.status, Submission.ON_HOLD, - "Submission status should reflect hold action" - " performed outside the purview of the event model.")
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/classic/util.html b/docs/_modules/events/services/classic/util.html deleted file mode 100644 index dbd787b..0000000 --- a/docs/_modules/events/services/classic/util.html +++ /dev/null @@ -1,135 +0,0 @@ - - - - - - - events.services.classic.util — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.classic.util

-"""Utility classes and functions for :mod:`events.services.classic`."""
-
-import json
-from typing import Optional
-import sqlalchemy.types as types
-
-
-
[docs]class SQLiteJSON(types.TypeDecorator): - """A SQLite-friendly JSON data type.""" - - impl = types.TEXT - -
[docs] def process_bind_param(self, value: Optional[dict], dialect: str) -> str: - """Serialize a dict to JSON.""" - if value is not None: - value = json.dumps(value) - return value
- -
[docs] def process_result_value(self, value: str, dialect: str) -> Optional[dict]: - """Deserialize JSON content to a dict.""" - if value is not None: - value = json.loads(value) - return value
- - -# SQLite does not support JSON, so we extend JSON to use our custom data type -# as a variant for the 'sqlite' dialect. -FriendlyJSON = types.JSON().with_variant(SQLiteJSON, 'sqlite') -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/services/notification.html b/docs/_modules/events/services/notification.html deleted file mode 100644 index 93317a8..0000000 --- a/docs/_modules/events/services/notification.html +++ /dev/null @@ -1,130 +0,0 @@ - - - - - - - events.services.notification — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.services.notification

-"""
-The notication service provides integration with the notification broker.
-
-Brokered notifications provide an event-based integration strategy for other
-arXiv submission and moderation services, including the webhook service (to
-notify external-to-arXiv services).
-"""
-
-from events.domain import Event, Submission
-
-
-
[docs]def emit(event: Event, submission: Submission) -> None: - """ - Emit an event to the notification broker. - - Parameters - ---------- - event : :class:`.Event` - submission : :class:`.Submission` - - Raises - ------ - NotificationFailed - """
- # TODO: implement me! -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/events/tests.html b/docs/_modules/events/tests.html deleted file mode 100644 index ff82232..0000000 --- a/docs/_modules/events/tests.html +++ /dev/null @@ -1,306 +0,0 @@ - - - - - - - events.tests — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for events.tests

-"""Tests for :mod:`events` public API."""
-
-from unittest import TestCase, mock
-import os
-from collections import defaultdict
-from datetime import datetime, timedelta
-from flask import Flask
-from events import save, load, Submission, User, Event, UpdateMetadata, \
-    EventRule, RuleCondition, RuleConsequence, CreateComment, \
-    SubmissionMetadata, CreateSubmission, UpdateAuthors, Author
-from events.exceptions import NoSuchSubmission, InvalidEvent
-from events.services import classic
-
-
-
[docs]def mock_store_events(*events, submission): - """Mock for :func:`events.services.database.store_events`.""" - if submission.submission_id is None: - submission.submission_id = 1 - for event in events: - event.committed = True - event.submission_id = submission.submission_id - return submission
- - -
[docs]class TestLoad(TestCase): - """Test :func:`.load`.""" - -
[docs] @mock.patch('events.classic') - def test_load_existant_submission(self, mock_classic): - """When the submission exists, submission and events are returned.""" - u = User(12345, 'joe@joe.joe') - mock_classic.get_submission.return_value = ( - Submission(creator=u, submission_id=1, owner=u, - created=datetime.now()), - [CreateSubmission(creator=u, submission_id=1, committed=True)] - ) - submission, events = load(1) - self.assertEqual(mock_classic.get_submission.call_count, 1) - self.assertIsInstance(submission, Submission, - "A submission should be returned") - self.assertIsInstance(events, list, - "A list of events should be returned") - self.assertIsInstance(events[0], Event, - "A list of events should be returned")
- -
[docs] @mock.patch('events.classic') - def test_load_nonexistant_submission(self, mock_classic): - """When the submission does not exist, an exception is raised.""" - mock_classic.get_submission.side_effect = classic.NoSuchSubmission - mock_classic.NoSuchSubmission = classic.NoSuchSubmission - with self.assertRaises(NoSuchSubmission): - load(1)
- - -
[docs]class TestSave(TestCase): - """Test :func:`.save`.""" - -
[docs] @mock.patch('events.classic') - def test_save_creation_event(self, mock_database): - """A :class:`.CreationEvent` is passed.""" - mock_database.store_events = mock_store_events - user = User(12345, 'joe@joe.joe') - event = CreateSubmission(creator=user) - submission, events = save(event) - self.assertIsInstance(submission, Submission, - "A submission instance should be returned") - self.assertIsInstance(events[0], Event, - "Should return a list of events") - self.assertEqual(events[0], event, - "The first event should be the event that was passed") - self.assertIsNotNone(submission.submission_id, - "Submission ID should be set.")
- -
[docs] @mock.patch('events.classic') - def test_save_events_from_scratch(self, mock_database): - """Save multiple events for a nonexistant submission.""" - mock_database.store_events = mock_store_events - user = User(12345, 'joe@joe.joe') - e = CreateSubmission(creator=user) - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) - submission, events = save(e, e2) - - self.assertEqual(submission.metadata.title, 'foo') - self.assertIsInstance(submission.submission_id, int) - self.assertEqual(submission.created, e.created)
- -
[docs] @mock.patch('events.classic') - def test_create_and_update_authors(self, mock_database): - """Save multiple events for a nonexistant submission.""" - mock_database.store_events = mock_store_events - user = User(12345, 'joe@joe.joe') - e = CreateSubmission(creator=user) - e2 = UpdateAuthors(creator=user, authors=[ - Author(0, forename='Joe', surname="Bloggs", email="joe@blog.gs") - ]) - submission, events = save(e, e2) - self.assertIsInstance(submission.metadata.authors[0], Author)
- -
[docs] @mock.patch('events.classic') - def test_save_from_scratch_without_creation_event(self, mock_database): - """An exception is raised when there is no creation event.""" - mock_database.store_events = mock_store_events - user = User(12345, 'joe@joe.joe') - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) - with self.assertRaises(NoSuchSubmission): - save(e2)
- -
[docs] @mock.patch('events.classic') - def test_save_invalid_event(self, mock_db): - """An exception is raised when an invalid event is encountered.""" - mock_db.get_events.return_value = [] - - class EventMock(CreateSubmission): - def valid(self, *args, **kwargs): - return False - - user = User(12345, 'joe@joe.joe') - e = EventMock(creator=user, submission_id=1) - with self.assertRaises(InvalidEvent): - save(e, submission_id=1)
- -
[docs] @mock.patch('events.classic') - def test_save_events_on_existing_submission(self, mock_db): - """Save multiple sets of events in separate calls to :func:`.save`.""" - cache = defaultdict(list) - - def mock_store_events_with_cache(*events, submission): - if submission.submission_id is None: - submission.submission_id = 1 - for event in events: - event.committed = True - event.submission_id = submission.submission_id - cache[event.submission_id].append(event) - return submission - - def mock_get_events(submission_id): - return cache[submission_id] - - mock_db.store_events = mock_store_events_with_cache - mock_db.get_events = mock_get_events - - # Here is the first set of events. - user = User(12345, 'joe@joe.joe') - e = CreateSubmission(creator=user) - e2 = UpdateMetadata(creator=user, metadata=[['title', 'foo']]) - submission, _ = save(e, e2) - submission_id = submission.submission_id - - # Now we apply a second set of events. - e3 = UpdateMetadata(creator=user, metadata=[['abstract', 'bar']]) - submission2, _ = save(e3, submission_id=submission_id) - - # The submission state reflects all three events. - self.assertEqual(submission2.metadata.abstract, 'bar', - "State of the submission should reflect both sets" - " of events.") - self.assertEqual(submission2.metadata.title, 'foo', - "State of the submission should reflect both sets" - " of events.") - self.assertEqual(submission2.created, e.created, - "The creation date of the submission should be the" - " original creation date.") - self.assertEqual(submission2.submission_id, submission_id, - "The submission ID should remain the same.")
- - # TODO: restore this when rules are implemented. - # - # @mock.patch('events.classic') - # def test_apply_events_with_rules(self, mock_db): - # """Save a set of events for which some rules apply.""" - # # Given the following rule... - # def mock_get_rules_for_submission(submission_id): - # return [ - # # If the metadata of any submission was updated, add a comment. - # EventRule( - # rule_id=1, - # creator=User('foo'), - # condition=RuleCondition( - # event_type=UpdateMetadata, - # extra_condition={} - # ), - # consequence=RuleConsequence( - # event_creator=User('foo'), - # event_type=CreateCommentEvent, - # event_data={ - # 'body': 'The metadata was updated', - # 'scope': 'private' - # } - # ) - # ) - # ] - # mock_db.get_rules = mock_get_rules_for_submission - # mock_db.store_events = mock_store_events - # e = CreateSubmission(creator=User('foo')) - # e2 = UpdateMetadata(creator=User('foo'), - # metadata=[['title', 'foo']]) - # submission, events = save(e, e2) - # self.assertEqual(len(submission.comments), 1, - # "A comment should be added to the submission.") - # self.assertEqual(len(events), 3, - # "A third event is added to the stack.") -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/index.html b/docs/_modules/index.html deleted file mode 100644 index 32b6ef4..0000000 --- a/docs/_modules/index.html +++ /dev/null @@ -1,132 +0,0 @@ - - - - - - - Overview: module code — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - - - - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/controllers/submission.html b/docs/_modules/metadata/controllers/submission.html deleted file mode 100644 index ce7d86d..0000000 --- a/docs/_modules/metadata/controllers/submission.html +++ /dev/null @@ -1,235 +0,0 @@ - - - - - - - metadata.controllers.submission — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.controllers.submission

-"""Controllers for the metadata API."""
-
-import json
-from functools import wraps
-from datetime import datetime
-import copy
-from arxiv.base import logging
-from typing import Tuple, List, Callable, Optional
-
-from flask import url_for, current_app
-from werkzeug.exceptions import NotFound, BadRequest, InternalServerError
-
-from arxiv import status
-from events.domain.agent import Agent, agent_factory, System
-from events.domain import Event
-from events.domain.submission import Submission, Classification, License, \
-    SubmissionMetadata
-import events as ev
-
-from metadata.controllers import util
-from . import handlers
-
-logger = logging.getLogger(__name__)
-
-
-Response = Tuple[dict, int, dict]
-
-
-def _get_agents(headers: dict, user_data: dict, client_data: dict) \
-        -> Tuple[Agent, Agent, Optional[Agent]]:
-    user = ev.User(
-        native_id=user_data['user_id'],
-        email=user_data['email']
-    )
-    client = ev.Client(native_id=client_data['client_id'])
-    on_behalf_of = headers.get('X-On-Behalf-Of')
-    if on_behalf_of is not None:
-        proxy = user
-        user = ev.User(on_behalf_of, '', '')
-    else:
-        proxy = None
-    return user, client, proxy
-
-
-
[docs]@util.validate_request('schema/resources/submission.json') -def create_submission(data: dict, headers: dict, user_data: dict, - client_data: dict, token: str) -> Response: - """ - Create a new submission. - - Implements the hook for :meth:`sword.SWORDCollection.add_submission`. - - Parameters - ---------- - data : dict - Deserialized compact JSON-LD document. - headers : dict - Request headers from the client. - - Returns - ------- - dict - Response data. - int - HTTP status code. - dict - Headers to add to the response. - """ - logger.debug('Received request to create submission') - user, client, proxy = _get_agents(headers, user_data, client_data) - logger.debug(f'User: {user}; client: {client}, proxy: {proxy}') - agents = dict(creator=user, client=client, proxy=proxy) - create = ev.CreateSubmission(creator=user, client=client, proxy=proxy) - events = handlers.handle_submission(data, agents) - try: - submission, events = ev.save(create, *events) - except ev.InvalidEvent as e: - raise InternalServerError(str(e)) from e - except ev.SaveError as e: - logger.error('Problem interacting with database: (%s) %s', - str(type(e)), str(e)) - raise InternalServerError('Problem interacting with database') from e - except Exception as e: - logger.error('Unhandled exception: (%s) %s', str(type(e)), str(e)) - raise InternalServerError('Encountered unhandled exception') from e - - response_headers = { - 'Location': url_for('submission.get_submission', - submission_id=submission.submission_id) - } - return submission.to_dict(), status.HTTP_201_CREATED, response_headers
- - -
[docs]def get_submission(submission_id: str, user: Optional[str] = None, - client: Optional[str] = None, - token: Optional[str] = None) -> Response: - """Retrieve the current state of a submission.""" - try: - submission, events = ev.load(submission_id) - except ev.NoSuchSubmission as e: - raise NotFound('Submission not found') from e - except Exception as e: - logger.error('Unhandled exception: (%s) %s', str(type(e)), str(e)) - raise InternalServerError('Encountered unhandled exception') from e - return submission.to_dict(), status.HTTP_200_OK, {}
- - -
[docs]@util.validate_request('schema/resources/submission.json') -def update_submission(data: dict, headers: dict, user_data: dict, - client_data: dict, token: str, submission_id: str) \ - -> Response: - """Update the submission.""" - user, client, proxy = _get_agents(headers, user_data, client_data) - agents = dict(creator=user, client=client, proxy=proxy) - events = handlers.handle_submission(data, agents) - try: - submission, events = ev.save(*events, submission_id=submission_id) - except ev.NoSuchSubmission as e: - raise NotFound(f"No submission found with id {submission_id}") - except ev.InvalidEvent as e: - raise InternalServerError(str(e)) from e - except ev.SaveError as e: - raise InternalServerError('Problem interacting with database') from e - except Exception as e: - logger.error('Unhandled exception: (%s) %s', str(type(e)), str(e)) - raise InternalServerError('Encountered unhandled exception') from e - - response_headers = { - 'Location': url_for('submit.get_submission', creator=user, - submission_id=submission.submission_id) - } - return submission.to_dict(), status.HTTP_200_OK, response_headers
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/controllers/submission/handlers.html b/docs/_modules/metadata/controllers/submission/handlers.html deleted file mode 100644 index 2a98fc5..0000000 --- a/docs/_modules/metadata/controllers/submission/handlers.html +++ /dev/null @@ -1,314 +0,0 @@ - - - - - - - metadata.controllers.submission.handlers — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.controllers.submission.handlers

-"""
-Data handling logic for submission data payload.
-
-Each handler (a function) should accept two parameters:
-
-- ``data`` is the value of the field that it handles. This can be anything that
-  is deserializable from a JSON document.
-- ``agents`` is a dict with the ``creator``, ``proxy``, and ``client`` agents
-  to use when creating new events.
-
-The primary controller in this module is the :func:`.handle_submission`, which
-delegates work to the handlers. The global ``HANDLERS`` defined at the end of
-this module describes how delegation should occur.
-
-Note: data validation should not be implemented here! Events/commands in
-:mod:`events` should define required parameters, perform all validation,
-and carry out any required transformation/cleanup.
-"""
-from typing import Tuple, Optional, Dict, Callable, List
-
-import events
-
-
-
[docs]def handle_submission(data: dict, agents: dict) -> Tuple[events.Event]: - """ - Handle the submission payload. - - We assume that schema validation has been performed already, so it's not - up to us to verify the shape of the data. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - _events = [] - for key_path, handler in HANDLERS: - value = data - for key in key_path: - if key not in value: - value = None - break - value = value[key] - if value is None: - continue - _events += handler(value, agents) - return tuple(_events)
- - -
[docs]def handle_submitter_is_author(data: bool, agents: dict) \ - -> Tuple[events.Event]: - """ - Handle the ``submitter_is_author`` field in submission payload. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - return events.AssertAuthorship(**agents, submitter_is_author=data),
- - -
[docs]def handle_license(data: dict, agents: dict) -> Tuple[events.Event]: - """Handle the ``license`` field in submission payload.""" - return events.SelectLicense( - **agents, - license_name=data.get('name', ''), - license_uri=data['uri'] - ),
- - -
[docs]def handle_submitter_accepts_policy(data: dict, agents: dict) \ - -> Tuple[events.Event]: - """ - Handle the ``submitter_accepts_policy`` field in submission payload. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - if data: - return events.AcceptPolicy(**agents), - return tuple()
- - -
[docs]def handle_primary_classification(data: dict, agents: dict) \ - -> Optional[Tuple[events.Event]]: - """Handle the ``primary_classification`` field in submission payload.""" - return events.SetPrimaryClassification( - **agents, - category=data['category'] - ),
- - -
[docs]def handle_secondary_classification(data: list, agents: dict) \ - -> Tuple[events.Event]: - """ - Handle the ``secondary_classification`` field in submission payload. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - return tuple([ - events.AddSecondaryClassification(**agents, category=clsn['category']) - for clsn in data - ])
- - -
[docs]def handle_metadata(data: dict, agents: dict) -> Tuple[events.Event]: - """ - Handle the ``metadata`` field in the submission payload. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - # Most of this could be in a list comprehension, but it may help to - # keep this verbose in case we want to intervene on values. - _metadata = [] - for key in events.UpdateMetadata.FIELDS: - if key not in data: - continue - _metadata.append((key, data[key])) - if not _metadata: - return tuple() - return events.UpdateMetadata(**agents, metadata=_metadata),
- - -
[docs]def handle_authors(data: dict, agents: dict) -> Tuple[events.Event]: - """ - Handle authors in the submission payload. - - Parameters - ---------- - data : dict - agents : dict - Values are :class:`events.Agent` instances. - - Returns - ------- - tuple - Zero or more uncommitted :class:`events.Event` instances. - """ - if not data: - return tuple() - _authors = [] - for i, au in enumerate(data): - if 'order' not in au: - au['order'] = i - _authors.append(events.Author(**au)) - return events.UpdateAuthors(**agents, authors=_authors),
- - -HANDLERS: List[Tuple[Tuple[str], Callable]] = [ - (('submitter_is_author', ), handle_submitter_is_author), - (('license', ), handle_license), - (('submitter_accepts_policy', ), handle_submitter_accepts_policy), - (('primary_classification', ), handle_primary_classification), - (('secondary_classification', ), handle_secondary_classification), - (('metadata', ), handle_metadata), - (('metadata', 'authors'), handle_authors) -] -""" -Describes how data in the payload should be handled. - -Each item is a two-tuple, defining the key-path to some data in the -submission payload and the handler function that should be applied. - -A key-path is a tuple of keys to be applied recursively to access the data. -E.g. the key-path ``('metadata', 'authors')`` will access -``payload['metadata']['authors']`` and pass the referent to the corresponding -handler. - -Extra data in the payload is simply ignored. -""" -
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/controllers/submission/tests.html b/docs/_modules/metadata/controllers/submission/tests.html deleted file mode 100644 index 86717a3..0000000 --- a/docs/_modules/metadata/controllers/submission/tests.html +++ /dev/null @@ -1,361 +0,0 @@ - - - - - - - metadata.controllers.submission.tests — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.controllers.submission.tests

-"""Tests for :mod:`api.controllers`."""
-
-from unittest import TestCase, mock
-import json
-from datetime import datetime
-from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
-
-from arxiv import status
-from events.domain import User, Submission, Author
-from events import CreateSubmission, UpdateMetadata, SaveError, \
-    InvalidEvent, NoSuchSubmission, SetPrimaryClassification, \
-    AttachSourceContent, UpdateAuthors
-from metadata.controllers import submission
-
-
-
[docs]def preserve_exceptions_and_events(mock_events): - """Add real exceptions back to the mock.""" - mock_events.SaveError = SaveError - mock_events.InvalidEvent = InvalidEvent - mock_events.NoSuchSubmission = NoSuchSubmission - mock_events.UpdateMetadata = UpdateMetadata - mock_events.UpdateAuthors = UpdateAuthors - mock_events.Author = Author - mock_events.CreateSubmission = CreateSubmission - mock_events.SetPrimaryClassification = SetPrimaryClassification - mock_events.AttachSourceContent = AttachSourceContent
- - -
[docs]class TestCreateSubmission(TestCase): - """Tests for :func:`.submission.create_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_valid_data(self, mock_events, url_for): - """Create a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user)] - ) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - resp, stat, head = submission.create_submission(data, self.headers, - self.user_data, - self.client_data, - self.token) - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], CreateSubmission, - "Should pass a CreateSubmission first") - self.assertIsInstance(call_args[1], SetPrimaryClassification, - "Should pass a SetPrimaryClassification") - self.assertEqual(stat, status.HTTP_201_CREATED, - "Should return 201 Created when submission is" - " successfully created.") - self.assertIn('Location', head, "Should include a Location header.")
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_invalid_data(self, mock_events, url_for): - """Trying to create a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'metadata': 'bad value', - } - with self.assertRaises(BadRequest): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = InvalidEvent - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- - -
[docs]class TestUpdateSubmission(TestCase): - """Tests for :func:`.submission.update_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_valid_data(self, mock_events, url_for): - """Update a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[('title', 'foo title')])] - ) - data = { - 'metadata': { - 'title': 'foo title', - 'authors': [ - { - 'forename': 'Jane', - 'surname': 'Doe', - 'email': 'jane@doe.com' - } - ] - } - } - resp, stat, head = submission.update_submission(data, self.headers, - self.user_data, - self.client_data, - self.token, 1) - self.assertEqual(stat, status.HTTP_200_OK, - "Should return 200 OK when submission is" - " successfully updated.") - self.assertIn('Location', head, "Should include a Location header.") - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], UpdateMetadata, - "Should pass an UpdateMetadata") - self.assertIsInstance(call_args[1], UpdateAuthors, - "Should pass an UpdateAuthors")
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_nonexistant_submission(self, mock_events, url_for): - """Trying to update a nonexistant submission throws exception.""" - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = NoSuchSubmission - url_for.return_value = '/foo/' - data = { - 'metadata': { - 'title': 'foo title' - } - } - with self.assertRaises(NotFound): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_invalid_data(self, mock_events, url_for): - """Trying to update a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'metadata': 'bad value', - } - with self.assertRaises(BadRequest): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = InvalidEvent - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- - -
[docs]class TestGetSubmission(TestCase): - """Tests for :func:`.submission.get_submission`.""" - -
[docs] @mock.patch('metadata.controllers.submission.ev') - def test_get_submission(self, mock_events): - """Should return a JSON-serializable dict if submisison exists.""" - preserve_exceptions_and_events(mock_events) - user = User(1234, 'foo@bar.baz') - mock_events.load.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user)] - ) - content, status_code, headers = submission.get_submission(1) - self.assertEqual(mock_events.load.call_count, 1, - "Should call load() in the events core package") - self.assertEqual(status_code, status.HTTP_200_OK, - "Should return 200 OK") - self.assertIsInstance(content, dict, "Should return a dict") - try: - json.dumps(content) - except Exception: - self.fail("Content should be JSON-serializable.")
- -
[docs] @mock.patch('metadata.controllers.submission.ev') - def test_get_nonexistant_submission(self, mock_events): - """Should raise NotFound if the submission does not exist.""" - preserve_exceptions_and_events(mock_events) - mock_events.load.side_effect = NoSuchSubmission - with self.assertRaises(NotFound): - submission.get_submission(1)
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/controllers/tests.html b/docs/_modules/metadata/controllers/tests.html deleted file mode 100644 index c36a5d8..0000000 --- a/docs/_modules/metadata/controllers/tests.html +++ /dev/null @@ -1,359 +0,0 @@ - - - - - - - metadata.controllers.tests — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.controllers.tests

-"""Tests for :mod:`api.controllers`."""
-
-from unittest import TestCase, mock
-import json
-from datetime import datetime
-from werkzeug.exceptions import BadRequest, InternalServerError, NotFound
-
-from arxiv import status
-from events.domain import User, Submission, Author
-from events import CreateSubmission, UpdateMetadata, SaveError, \
-    InvalidEvent, NoSuchSubmission, SetPrimaryClassification, \
-    AttachSourceContent, UpdateAuthors
-from metadata.controllers import submission
-
-
-
[docs]def preserve_exceptions_and_events(mock_events): - """Add real exceptions back to the mock.""" - mock_events.SaveError = SaveError - mock_events.InvalidEvent = InvalidEvent - mock_events.NoSuchSubmission = NoSuchSubmission - mock_events.UpdateMetadata = UpdateMetadata - mock_events.UpdateAuthors = UpdateAuthors - mock_events.Author = Author - mock_events.CreateSubmission = CreateSubmission - mock_events.SetPrimaryClassification = SetPrimaryClassification - mock_events.AttachSourceContent = AttachSourceContent
- - -
[docs]class TestCreateSubmission(TestCase): - """Tests for :func:`.submission.create_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_valid_data(self, mock_events, url_for): - """Create a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user)] - ) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - resp, stat, head = submission.create_submission(data, self.headers, - self.user_data, - self.client_data, - self.token) - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], CreateSubmission, - "Should pass a CreateSubmission first") - self.assertIsInstance(call_args[1], SetPrimaryClassification, - "Should pass a SetPrimaryClassification") - self.assertEqual(stat, status.HTTP_201_CREATED, - "Should return 201 Created when submission is" - " successfully created.") - self.assertIn('Location', head, "Should include a Location header.")
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_invalid_data(self, mock_events, url_for): - """Trying to create a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'metadata': 'bad value', - } - with self.assertRaises(BadRequest): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_create_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = InvalidEvent - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.create_submission(data, self.headers, self.user_data, - self.client_data, self.token)
- - -
[docs]class TestUpdateSubmission(TestCase): - """Tests for :func:`.submission.update_submission`.""" - -
[docs] def setUp(self): - """Create some fake request data.""" - self.user_data = {'user_id': 1234, 'email': 'foo@bar.baz'} - self.client_data = {'client_id': 5678} - self.token = 'asdf1234' - self.headers = {}
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_valid_data(self, mock_events, url_for): - """Update a submission with valid data.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - user = User(1234, 'foo@bar.baz') - mock_events.save.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user), - UpdateMetadata(creator=user, metadata=[('title', 'foo title')])] - ) - data = { - 'metadata': { - 'title': 'foo title', - 'authors': [ - { - 'forename': 'Jane', - 'surname': 'Doe', - 'email': 'jane@doe.com' - } - ] - } - } - resp, stat, head = submission.update_submission(data, self.headers, - self.user_data, - self.client_data, - self.token, 1) - self.assertEqual(stat, status.HTTP_200_OK, - "Should return 200 OK when submission is" - " successfully updated.") - self.assertIn('Location', head, "Should include a Location header.") - call_args, call_kwargs = mock_events.save.call_args - - self.assertIsInstance(call_args[0], UpdateMetadata, - "Should pass an UpdateMetadata") - self.assertIsInstance(call_args[1], UpdateAuthors, - "Should pass an UpdateAuthors")
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_nonexistant_submission(self, mock_events, url_for): - """Trying to update a nonexistant submission throws exception.""" - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = NoSuchSubmission - url_for.return_value = '/foo/' - data = { - 'metadata': { - 'title': 'foo title' - } - } - with self.assertRaises(NotFound): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_invalid_data(self, mock_events, url_for): - """Trying to update a submission with invalid data throws exception.""" - preserve_exceptions_and_events(mock_events) - url_for.return_value = '/foo/' - data = { - 'metadata': 'bad value', - } - with self.assertRaises(BadRequest): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_db_down(self, mock_events, url_for): - """An internal server error is raised when the database is down.""" - url_for.return_value = '/foo/' - mock_events.save.side_effect = SaveError - preserve_exceptions_and_events(mock_events) - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- -
[docs] @mock.patch('metadata.controllers.submission.url_for') - @mock.patch('metadata.controllers.submission.ev') - def test_update_submission_with_invalid_event(self, mock_events, url_for): - """An internal server error is raised on an invalid event.""" - url_for.return_value = '/foo/' - preserve_exceptions_and_events(mock_events) - mock_events.save.side_effect = InvalidEvent - data = { - 'primary_classification': { - 'category': 'astro-ph' - } - } - with self.assertRaises(InternalServerError): - submission.update_submission(data, self.headers, self.user_data, - self.client_data, self.token, 1)
- - -
[docs]class TestGetSubmission(TestCase): - """Tests for :func:`.submission.get_submission`.""" - -
[docs] @mock.patch('metadata.controllers.submission.ev') - def test_get_submission(self, mock_events): - """Should return a JSON-serializable dict if submisison exists.""" - preserve_exceptions_and_events(mock_events) - user = User(1234, 'foo@bar.baz') - mock_events.load.return_value = ( - Submission(creator=user, owner=user, created=datetime.now()), - [CreateSubmission(creator=user)] - ) - content, status_code, headers = submission.get_submission(1) - self.assertEqual(mock_events.load.call_count, 1, - "Should call load() in the events core package") - self.assertEqual(status_code, status.HTTP_200_OK, - "Should return 200 OK") - self.assertIsInstance(content, dict, "Should return a dict") - try: - json.dumps(content) - except Exception: - self.fail("Content should be JSON-serializable.")
- -
[docs] @mock.patch('metadata.controllers.submission.ev') - def test_get_nonexistant_submission(self, mock_events): - """Should raise NotFound if the submission does not exist.""" - preserve_exceptions_and_events(mock_events) - mock_events.load.side_effect = NoSuchSubmission - with self.assertRaises(NotFound): - submission.get_submission(1)
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/controllers/util.html b/docs/_modules/metadata/controllers/util.html deleted file mode 100644 index e595a22..0000000 --- a/docs/_modules/metadata/controllers/util.html +++ /dev/null @@ -1,151 +0,0 @@ - - - - - - - metadata.controllers.util — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.controllers.util

-"""Utilities/helpers for controllers."""
-
-import os
-from typing import Callable, Tuple, Any
-from functools import wraps
-from werkzeug.exceptions import BadRequest
-from arxiv.util import schema
-
-
-Response = Tuple[dict, int, dict]
-
-
-
[docs]def validate_request(schema_path: str) -> Callable: - """ - Generate a controller decorator that validates the request body. - - Parameters - ---------- - schema_path : str - Path (absolute, or relative to the execution path) to the JSON Schema - document. - - Returns - ------- - decorator - Decorates a controller function with request body validation against - the specified JSON Schema. - - """ - schema_path = os.path.join( - os.path.abspath(os.path.dirname(__file__)), - '../..', - schema_path - ) - validate = schema.load(schema_path) - - def _decorator(func: Callable) -> Callable: - @wraps(func) - def _wrpr(data: dict, *args: Any, **kwargs: Any) -> Response: - try: - validate(data) - except schema.ValidationError as e: - # A summary of the exception is on the first line of the repr. - raise BadRequest(str(e).split('\n')[0]) - response: Tuple[dict, int, dict] = func(data, *args, **kwargs) - return response - return _wrpr - return _decorator
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/exceptions.html b/docs/_modules/metadata/exceptions.html deleted file mode 100644 index e836904..0000000 --- a/docs/_modules/metadata/exceptions.html +++ /dev/null @@ -1,105 +0,0 @@ - - - - - - - metadata.exceptions — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.exceptions

-
[docs]class ValidationError(ValueError): - pass
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/factory.html b/docs/_modules/metadata/factory.html deleted file mode 100644 index 9b14524..0000000 --- a/docs/_modules/metadata/factory.html +++ /dev/null @@ -1,140 +0,0 @@ - - - - - - - metadata.factory — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.factory

-"""Application factory for references service components."""
-
-from arxiv.base import logging
-
-from flask import Flask, jsonify, make_response
-from werkzeug.exceptions import Forbidden, Unauthorized, NotFound, BadRequest
-
-from metadata import routes
-from arxiv.base.middleware import wrap
-
-from events.services import classic
-
-from authorization import middleware as auth
-
-
-
[docs]def jsonify_exception(error): - """Render the base 404 error page.""" - exc_resp = error.get_response() - response = jsonify(reason=error.description) - response.status_code = exc_resp.status_code - return response
- - -
[docs]def create_web_app() -> Flask: - """Initialize an instance of the extractor backend service.""" - app = Flask('metadata') - classic.init_app(app) - app.config.from_pyfile('config.py') - - app.register_blueprint(routes.blueprint) - app.errorhandler(Forbidden)(jsonify_exception) - app.errorhandler(NotFound)(jsonify_exception) - app.errorhandler(BadRequest)(jsonify_exception) - app.errorhandler(Unauthorized)(jsonify_exception) - - wrap(app, [auth.AuthMiddleware]) - return app
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/metadata/routes.html b/docs/_modules/metadata/routes.html deleted file mode 100644 index 7feca10..0000000 --- a/docs/_modules/metadata/routes.html +++ /dev/null @@ -1,188 +0,0 @@ - - - - - - - metadata.routes — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for metadata.routes

-"""Provides External REST API."""
-
-from arxiv.base import logging
-from typing import Callable, Union
-from functools import wraps
-from flask.json import jsonify
-from flask import Blueprint, current_app, redirect, request, g, Response
-
-from authorization.decorators import scoped
-from arxiv import status
-from metadata.controllers import submission
-
-logger = logging.getLogger(__name__)
-
-blueprint = Blueprint('submission', __name__, url_prefix='')
-
-
-
[docs]def json_response(func): - """Generate a wrapper for routes that JSONifies the response body.""" - @wraps(func) - def wrapper(*args, **kwargs): - r_body, r_status, r_headers = func(*args, **kwargs) - response = jsonify(r_body) - response.status_code = r_status - response.headers.extend(r_headers) - return response - return wrapper
- - -
[docs]@blueprint.route('/', methods=['POST']) -@json_response -@scoped('submission:write') -def create_submission() -> Union[str, Response]: - """Accept new submissions.""" - return submission.create_submission( - request.get_json(), - dict(request.headers), - user_data=g.user, - client_data=g.client, - token=g.token - )
- - -
[docs]@blueprint.route('/<string:submission_id>', methods=['GET']) -@json_response -@scoped('submission:read') -def get_submission(submission_id: str) -> tuple: - """Get the current state of a submission.""" - return submission.get_submission( - submission_id, - user=g.user, - client=g.client, - token=g.token - )
- -# -# @blueprint.route('/<string:submission_id>/history/', methods=['GET']) -# @authorization.scoped(authorization.READ) -# @json_response -# def get_submission_history(submission_id: str) -> tuple: -# """Get the event log for a submission.""" -# return submission.get_submission_log( -# request.get_json(), -# dict(request.headers), -# submission_id=submission_id, -# user=g.user, -# client=g.client, -# scope=g.scope, -# token=g.token -# ) - - -
[docs]@blueprint.route('/<string:submission_id>/', methods=['POST']) -@json_response -@scoped('submission:write') -def update_submission(submission_id: str) -> tuple: - """Update the submission.""" - return submission.update_submission( - submission_id, - request.get_json(), - dict(request.headers), - user=g.user, - client=g.client, - token=g.token - )
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/sqlalchemy/orm/attributes.html b/docs/_modules/sqlalchemy/orm/attributes.html deleted file mode 100644 index affc230..0000000 --- a/docs/_modules/sqlalchemy/orm/attributes.html +++ /dev/null @@ -1,1720 +0,0 @@ - - - - - - - sqlalchemy.orm.attributes — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for sqlalchemy.orm.attributes

-# orm/attributes.py
-# Copyright (C) 2005-2017 the SQLAlchemy authors and contributors
-# <see AUTHORS file>
-#
-# This module is part of SQLAlchemy and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-"""Defines instrumentation for class attributes and their interaction
-with instances.
-
-This module is usually not directly visible to user applications, but
-defines a large part of the ORM's interactivity.
-
-
-"""
-
-import operator
-from .. import util, event, inspection
-from . import interfaces, collections, exc as orm_exc
-
-from .base import instance_state, instance_dict, manager_of_class
-
-from .base import PASSIVE_NO_RESULT, ATTR_WAS_SET, ATTR_EMPTY, NO_VALUE,\
-    NEVER_SET, NO_CHANGE, CALLABLES_OK, SQL_OK, RELATED_OBJECT_OK,\
-    INIT_OK, NON_PERSISTENT_OK, LOAD_AGAINST_COMMITTED, PASSIVE_OFF,\
-    PASSIVE_RETURN_NEVER_SET, PASSIVE_NO_INITIALIZE, PASSIVE_NO_FETCH,\
-    PASSIVE_NO_FETCH_RELATED, PASSIVE_ONLY_PERSISTENT, NO_AUTOFLUSH
-from .base import state_str, instance_str
-
-
-@inspection._self_inspects
-class QueryableAttribute(interfaces._MappedAttribute,
-                         interfaces.InspectionAttr,
-                         interfaces.PropComparator):
-    """Base class for :term:`descriptor` objects that intercept
-    attribute events on behalf of a :class:`.MapperProperty`
-    object.  The actual :class:`.MapperProperty` is accessible
-    via the :attr:`.QueryableAttribute.property`
-    attribute.
-
-
-    .. seealso::
-
-        :class:`.InstrumentedAttribute`
-
-        :class:`.MapperProperty`
-
-        :attr:`.Mapper.all_orm_descriptors`
-
-        :attr:`.Mapper.attrs`
-    """
-
-    is_attribute = True
-
-    def __init__(self, class_, key, impl=None,
-                 comparator=None, parententity=None,
-                 of_type=None):
-        self.class_ = class_
-        self.key = key
-        self.impl = impl
-        self.comparator = comparator
-        self._parententity = parententity
-        self._of_type = of_type
-
-        manager = manager_of_class(class_)
-        # manager is None in the case of AliasedClass
-        if manager:
-            # propagate existing event listeners from
-            # immediate superclass
-            for base in manager._bases:
-                if key in base:
-                    self.dispatch._update(base[key].dispatch)
-
-    @util.memoized_property
-    def _supports_population(self):
-        return self.impl.supports_population
-
-    def get_history(self, instance, passive=PASSIVE_OFF):
-        return self.impl.get_history(instance_state(instance),
-                                     instance_dict(instance), passive)
-
-    def __selectable__(self):
-        # TODO: conditionally attach this method based on clause_element ?
-        return self
-
-    @util.memoized_property
-    def info(self):
-        """Return the 'info' dictionary for the underlying SQL element.
-
-        The behavior here is as follows:
-
-        * If the attribute is a column-mapped property, i.e.
-          :class:`.ColumnProperty`, which is mapped directly
-          to a schema-level :class:`.Column` object, this attribute
-          will return the :attr:`.SchemaItem.info` dictionary associated
-          with the core-level :class:`.Column` object.
-
-        * If the attribute is a :class:`.ColumnProperty` but is mapped to
-          any other kind of SQL expression other than a :class:`.Column`,
-          the attribute will refer to the :attr:`.MapperProperty.info`
-          dictionary associated directly with the :class:`.ColumnProperty`,
-          assuming the SQL expression itself does not have its own ``.info``
-          attribute (which should be the case, unless a user-defined SQL
-          construct has defined one).
-
-        * If the attribute refers to any other kind of
-          :class:`.MapperProperty`, including :class:`.RelationshipProperty`,
-          the attribute will refer to the :attr:`.MapperProperty.info`
-          dictionary associated with that :class:`.MapperProperty`.
-
-        * To access the :attr:`.MapperProperty.info` dictionary of the
-          :class:`.MapperProperty` unconditionally, including for a
-          :class:`.ColumnProperty` that's associated directly with a
-          :class:`.schema.Column`, the attribute can be referred to using
-          :attr:`.QueryableAttribute.property` attribute, as
-          ``MyClass.someattribute.property.info``.
-
-        .. versionadded:: 0.8.0
-
-        .. seealso::
-
-            :attr:`.SchemaItem.info`
-
-            :attr:`.MapperProperty.info`
-
-        """
-        return self.comparator.info
-
-    @util.memoized_property
-    def parent(self):
-        """Return an inspection instance representing the parent.
-
-        This will be either an instance of :class:`.Mapper`
-        or :class:`.AliasedInsp`, depending upon the nature
-        of the parent entity which this attribute is associated
-        with.
-
-        """
-        return inspection.inspect(self._parententity)
-
-    @property
-    def expression(self):
-        return self.comparator.__clause_element__()
-
-    def __clause_element__(self):
-        return self.comparator.__clause_element__()
-
-    def _query_clause_element(self):
-        """like __clause_element__(), but called specifically
-        by :class:`.Query` to allow special behavior."""
-
-        return self.comparator._query_clause_element()
-
-    def adapt_to_entity(self, adapt_to_entity):
-        assert not self._of_type
-        return self.__class__(adapt_to_entity.entity,
-                              self.key, impl=self.impl,
-                              comparator=self.comparator.adapt_to_entity(
-                                  adapt_to_entity),
-                              parententity=adapt_to_entity)
-
-    def of_type(self, cls):
-        return QueryableAttribute(
-            self.class_,
-            self.key,
-            self.impl,
-            self.comparator.of_type(cls),
-            self._parententity,
-            of_type=cls)
-
-    def label(self, name):
-        return self._query_clause_element().label(name)
-
-    def operate(self, op, *other, **kwargs):
-        return op(self.comparator, *other, **kwargs)
-
-    def reverse_operate(self, op, other, **kwargs):
-        return op(other, self.comparator, **kwargs)
-
-    def hasparent(self, state, optimistic=False):
-        return self.impl.hasparent(state, optimistic=optimistic) is not False
-
-    def __getattr__(self, key):
-        try:
-            return getattr(self.comparator, key)
-        except AttributeError:
-            raise AttributeError(
-                'Neither %r object nor %r object associated with %s '
-                'has an attribute %r' % (
-                    type(self).__name__,
-                    type(self.comparator).__name__,
-                    self,
-                    key)
-            )
-
-    def __str__(self):
-        return "%s.%s" % (self.class_.__name__, self.key)
-
-    @util.memoized_property
-    def property(self):
-        """Return the :class:`.MapperProperty` associated with this
-        :class:`.QueryableAttribute`.
-
-
-        Return values here will commonly be instances of
-        :class:`.ColumnProperty` or :class:`.RelationshipProperty`.
-
-
-        """
-        return self.comparator.property
-
-
-class InstrumentedAttribute(QueryableAttribute):
-    """Class bound instrumented attribute which adds basic
-    :term:`descriptor` methods.
-
-    See :class:`.QueryableAttribute` for a description of most features.
-
-
-    """
-
-    def __set__(self, instance, value):
-        self.impl.set(instance_state(instance),
-                      instance_dict(instance), value, None)
-
-    def __delete__(self, instance):
-        self.impl.delete(instance_state(instance), instance_dict(instance))
-
-    def __get__(self, instance, owner):
-        if instance is None:
-            return self
-
-        dict_ = instance_dict(instance)
-        if self._supports_population and self.key in dict_:
-            return dict_[self.key]
-        else:
-            return self.impl.get(instance_state(instance), dict_)
-
-
-def create_proxied_attribute(descriptor):
-    """Create an QueryableAttribute / user descriptor hybrid.
-
-    Returns a new QueryableAttribute type that delegates descriptor
-    behavior and getattr() to the given descriptor.
-    """
-
-    # TODO: can move this to descriptor_props if the need for this
-    # function is removed from ext/hybrid.py
-
-    class Proxy(QueryableAttribute):
-        """Presents the :class:`.QueryableAttribute` interface as a
-        proxy on top of a Python descriptor / :class:`.PropComparator`
-        combination.
-
-        """
-
-        def __init__(self, class_, key, descriptor,
-                     comparator,
-                     adapt_to_entity=None, doc=None,
-                     original_property=None):
-            self.class_ = class_
-            self.key = key
-            self.descriptor = descriptor
-            self.original_property = original_property
-            self._comparator = comparator
-            self._adapt_to_entity = adapt_to_entity
-            self.__doc__ = doc
-
-        @property
-        def property(self):
-            return self.comparator.property
-
-        @util.memoized_property
-        def comparator(self):
-            if util.callable(self._comparator):
-                self._comparator = self._comparator()
-            if self._adapt_to_entity:
-                self._comparator = self._comparator.adapt_to_entity(
-                    self._adapt_to_entity)
-            return self._comparator
-
-        def adapt_to_entity(self, adapt_to_entity):
-            return self.__class__(adapt_to_entity.entity,
-                                  self.key,
-                                  self.descriptor,
-                                  self._comparator,
-                                  adapt_to_entity)
-
-        def __get__(self, instance, owner):
-            if instance is None:
-                return self
-            else:
-                return self.descriptor.__get__(instance, owner)
-
-        def __str__(self):
-            return "%s.%s" % (self.class_.__name__, self.key)
-
-        def __getattr__(self, attribute):
-            """Delegate __getattr__ to the original descriptor and/or
-            comparator."""
-
-            try:
-                return getattr(descriptor, attribute)
-            except AttributeError:
-                try:
-                    return getattr(self.comparator, attribute)
-                except AttributeError:
-                    raise AttributeError(
-                        'Neither %r object nor %r object associated with %s '
-                        'has an attribute %r' % (
-                            type(descriptor).__name__,
-                            type(self.comparator).__name__,
-                            self,
-                            attribute)
-                    )
-
-    Proxy.__name__ = type(descriptor).__name__ + 'Proxy'
-
-    util.monkeypatch_proxied_specials(Proxy, type(descriptor),
-                                      name='descriptor',
-                                      from_instance=descriptor)
-    return Proxy
-
-OP_REMOVE = util.symbol("REMOVE")
-OP_APPEND = util.symbol("APPEND")
-OP_REPLACE = util.symbol("REPLACE")
-
-
-class Event(object):
-    """A token propagated throughout the course of a chain of attribute
-    events.
-
-    Serves as an indicator of the source of the event and also provides
-    a means of controlling propagation across a chain of attribute
-    operations.
-
-    The :class:`.Event` object is sent as the ``initiator`` argument
-    when dealing with the :meth:`.AttributeEvents.append`,
-    :meth:`.AttributeEvents.set`,
-    and :meth:`.AttributeEvents.remove` events.
-
-    The :class:`.Event` object is currently interpreted by the backref
-    event handlers, and is used to control the propagation of operations
-    across two mutually-dependent attributes.
-
-    .. versionadded:: 0.9.0
-
-    :var impl: The :class:`.AttributeImpl` which is the current event
-     initiator.
-
-    :var op: The symbol :attr:`.OP_APPEND`, :attr:`.OP_REMOVE` or
-     :attr:`.OP_REPLACE`, indicating the source operation.
-
-    """
-
-    __slots__ = 'impl', 'op', 'parent_token'
-
-    def __init__(self, attribute_impl, op):
-        self.impl = attribute_impl
-        self.op = op
-        self.parent_token = self.impl.parent_token
-
-    def __eq__(self, other):
-        return isinstance(other, Event) and \
-            other.impl is self.impl and \
-            other.op == self.op
-
-    @property
-    def key(self):
-        return self.impl.key
-
-    def hasparent(self, state):
-        return self.impl.hasparent(state)
-
-
-class AttributeImpl(object):
-    """internal implementation for instrumented attributes."""
-
-    def __init__(self, class_, key,
-                 callable_, dispatch, trackparent=False, extension=None,
-                 compare_function=None, active_history=False,
-                 parent_token=None, expire_missing=True,
-                 send_modified_events=True,
-                 **kwargs):
-        r"""Construct an AttributeImpl.
-
-        \class_
-          associated class
-
-        key
-          string name of the attribute
-
-        \callable_
-          optional function which generates a callable based on a parent
-          instance, which produces the "default" values for a scalar or
-          collection attribute when it's first accessed, if not present
-          already.
-
-        trackparent
-          if True, attempt to track if an instance has a parent attached
-          to it via this attribute.
-
-        extension
-          a single or list of AttributeExtension object(s) which will
-          receive set/delete/append/remove/etc. events.  Deprecated.
-          The event package is now used.
-
-        compare_function
-          a function that compares two values which are normally
-          assignable to this attribute.
-
-        active_history
-          indicates that get_history() should always return the "old" value,
-          even if it means executing a lazy callable upon attribute change.
-
-        parent_token
-          Usually references the MapperProperty, used as a key for
-          the hasparent() function to identify an "owning" attribute.
-          Allows multiple AttributeImpls to all match a single
-          owner attribute.
-
-        expire_missing
-          if False, don't add an "expiry" callable to this attribute
-          during state.expire_attributes(None), if no value is present
-          for this key.
-
-        send_modified_events
-          if False, the InstanceState._modified_event method will have no
-          effect; this means the attribute will never show up as changed in a
-          history entry.
-        """
-        self.class_ = class_
-        self.key = key
-        self.callable_ = callable_
-        self.dispatch = dispatch
-        self.trackparent = trackparent
-        self.parent_token = parent_token or self
-        self.send_modified_events = send_modified_events
-        if compare_function is None:
-            self.is_equal = operator.eq
-        else:
-            self.is_equal = compare_function
-
-        # TODO: pass in the manager here
-        # instead of doing a lookup
-        attr = manager_of_class(class_)[key]
-
-        for ext in util.to_list(extension or []):
-            ext._adapt_listener(attr, ext)
-
-        if active_history:
-            self.dispatch._active_history = True
-
-        self.expire_missing = expire_missing
-
-    __slots__ = (
-        'class_', 'key', 'callable_', 'dispatch', 'trackparent',
-        'parent_token', 'send_modified_events', 'is_equal', 'expire_missing'
-    )
-
-    def __str__(self):
-        return "%s.%s" % (self.class_.__name__, self.key)
-
-    def _get_active_history(self):
-        """Backwards compat for impl.active_history"""
-
-        return self.dispatch._active_history
-
-    def _set_active_history(self, value):
-        self.dispatch._active_history = value
-
-    active_history = property(_get_active_history, _set_active_history)
-
-    def hasparent(self, state, optimistic=False):
-        """Return the boolean value of a `hasparent` flag attached to
-        the given state.
-
-        The `optimistic` flag determines what the default return value
-        should be if no `hasparent` flag can be located.
-
-        As this function is used to determine if an instance is an
-        *orphan*, instances that were loaded from storage should be
-        assumed to not be orphans, until a True/False value for this
-        flag is set.
-
-        An instance attribute that is loaded by a callable function
-        will also not have a `hasparent` flag.
-
-        """
-        msg = "This AttributeImpl is not configured to track parents."
-        assert self.trackparent, msg
-
-        return state.parents.get(id(self.parent_token), optimistic) \
-            is not False
-
-    def sethasparent(self, state, parent_state, value):
-        """Set a boolean flag on the given item corresponding to
-        whether or not it is attached to a parent object via the
-        attribute represented by this ``InstrumentedAttribute``.
-
-        """
-        msg = "This AttributeImpl is not configured to track parents."
-        assert self.trackparent, msg
-
-        id_ = id(self.parent_token)
-        if value:
-            state.parents[id_] = parent_state
-        else:
-            if id_ in state.parents:
-                last_parent = state.parents[id_]
-
-                if last_parent is not False and \
-                        last_parent.key != parent_state.key:
-
-                    if last_parent.obj() is None:
-                        raise orm_exc.StaleDataError(
-                            "Removing state %s from parent "
-                            "state %s along attribute '%s', "
-                            "but the parent record "
-                            "has gone stale, can't be sure this "
-                            "is the most recent parent." %
-                            (state_str(state),
-                             state_str(parent_state),
-                             self.key))
-
-                    return
-
-            state.parents[id_] = False
-
-    def get_history(self, state, dict_, passive=PASSIVE_OFF):
-        raise NotImplementedError()
-
-    def get_all_pending(self, state, dict_, passive=PASSIVE_NO_INITIALIZE):
-        """Return a list of tuples of (state, obj)
-        for all objects in this attribute's current state
-        + history.
-
-        Only applies to object-based attributes.
-
-        This is an inlining of existing functionality
-        which roughly corresponds to:
-
-            get_state_history(
-                        state,
-                        key,
-                        passive=PASSIVE_NO_INITIALIZE).sum()
-
-        """
-        raise NotImplementedError()
-
-    def initialize(self, state, dict_):
-        """Initialize the given state's attribute with an empty value."""
-
-        value = None
-        for fn in self.dispatch.init_scalar:
-            ret = fn(state, value, dict_)
-            if ret is not ATTR_EMPTY:
-                value = ret
-
-        return value
-
-    def get(self, state, dict_, passive=PASSIVE_OFF):
-        """Retrieve a value from the given object.
-        If a callable is assembled on this object's attribute, and
-        passive is False, the callable will be executed and the
-        resulting value will be set as the new value for this attribute.
-        """
-        if self.key in dict_:
-            return dict_[self.key]
-        else:
-            # if history present, don't load
-            key = self.key
-            if key not in state.committed_state or \
-                    state.committed_state[key] is NEVER_SET:
-                if not passive & CALLABLES_OK:
-                    return PASSIVE_NO_RESULT
-
-                if key in state.expired_attributes:
-                    value = state._load_expired(state, passive)
-                elif key in state.callables:
-                    callable_ = state.callables[key]
-                    value = callable_(state, passive)
-                elif self.callable_:
-                    value = self.callable_(state, passive)
-                else:
-                    value = ATTR_EMPTY
-
-                if value is PASSIVE_NO_RESULT or value is NEVER_SET:
-                    return value
-                elif value is ATTR_WAS_SET:
-                    try:
-                        return dict_[key]
-                    except KeyError:
-                        # TODO: no test coverage here.
-                        raise KeyError(
-                            "Deferred loader for attribute "
-                            "%r failed to populate "
-                            "correctly" % key)
-                elif value is not ATTR_EMPTY:
-                    return self.set_committed_value(state, dict_, value)
-
-            if not passive & INIT_OK:
-                return NEVER_SET
-            else:
-                # Return a new, empty value
-                return self.initialize(state, dict_)
-
-    def append(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        self.set(state, dict_, value, initiator, passive=passive)
-
-    def remove(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        self.set(state, dict_, None, initiator,
-                 passive=passive, check_old=value)
-
-    def pop(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        self.set(state, dict_, None, initiator,
-                 passive=passive, check_old=value, pop=True)
-
-    def set(self, state, dict_, value, initiator,
-            passive=PASSIVE_OFF, check_old=None, pop=False):
-        raise NotImplementedError()
-
-    def get_committed_value(self, state, dict_, passive=PASSIVE_OFF):
-        """return the unchanged value of this attribute"""
-
-        if self.key in state.committed_state:
-            value = state.committed_state[self.key]
-            if value in (NO_VALUE, NEVER_SET):
-                return None
-            else:
-                return value
-        else:
-            return self.get(state, dict_, passive=passive)
-
-    def set_committed_value(self, state, dict_, value):
-        """set an attribute value on the given instance and 'commit' it."""
-
-        dict_[self.key] = value
-        state._commit(dict_, [self.key])
-        return value
-
-
-class ScalarAttributeImpl(AttributeImpl):
-    """represents a scalar value-holding InstrumentedAttribute."""
-
-    accepts_scalar_loader = True
-    uses_objects = False
-    supports_population = True
-    collection = False
-
-    __slots__ = '_replace_token', '_append_token', '_remove_token'
-
-    def __init__(self, *arg, **kw):
-        super(ScalarAttributeImpl, self).__init__(*arg, **kw)
-        self._replace_token = self._append_token = None
-        self._remove_token = None
-
-    def _init_append_token(self):
-        self._replace_token = self._append_token = Event(self, OP_REPLACE)
-        return self._replace_token
-
-    _init_append_or_replace_token = _init_append_token
-
-    def _init_remove_token(self):
-        self._remove_token = Event(self, OP_REMOVE)
-        return self._remove_token
-
-    def delete(self, state, dict_):
-
-        # TODO: catch key errors, convert to attributeerror?
-        if self.dispatch._active_history:
-            old = self.get(state, dict_, PASSIVE_RETURN_NEVER_SET)
-        else:
-            old = dict_.get(self.key, NO_VALUE)
-
-        if self.dispatch.remove:
-            self.fire_remove_event(state, dict_, old, self._remove_token)
-        state._modified_event(dict_, self, old)
-        del dict_[self.key]
-
-    def get_history(self, state, dict_, passive=PASSIVE_OFF):
-        if self.key in dict_:
-            return History.from_scalar_attribute(self, state, dict_[self.key])
-        else:
-            if passive & INIT_OK:
-                passive ^= INIT_OK
-            current = self.get(state, dict_, passive=passive)
-            if current is PASSIVE_NO_RESULT:
-                return HISTORY_BLANK
-            else:
-                return History.from_scalar_attribute(self, state, current)
-
-    def set(self, state, dict_, value, initiator,
-            passive=PASSIVE_OFF, check_old=None, pop=False):
-        if self.dispatch._active_history:
-            old = self.get(state, dict_, PASSIVE_RETURN_NEVER_SET)
-        else:
-            old = dict_.get(self.key, NO_VALUE)
-
-        if self.dispatch.set:
-            value = self.fire_replace_event(state, dict_,
-                                            value, old, initiator)
-        state._modified_event(dict_, self, old)
-        dict_[self.key] = value
-
-    def fire_replace_event(self, state, dict_, value, previous, initiator):
-        for fn in self.dispatch.set:
-            value = fn(
-                state, value, previous,
-                initiator or self._replace_token or
-                self._init_append_or_replace_token())
-        return value
-
-    def fire_remove_event(self, state, dict_, value, initiator):
-        for fn in self.dispatch.remove:
-            fn(state, value,
-               initiator or self._remove_token or self._init_remove_token())
-
-    @property
-    def type(self):
-        self.property.columns[0].type
-
-
-class ScalarObjectAttributeImpl(ScalarAttributeImpl):
-    """represents a scalar-holding InstrumentedAttribute,
-       where the target object is also instrumented.
-
-       Adds events to delete/set operations.
-
-    """
-
-    accepts_scalar_loader = False
-    uses_objects = True
-    supports_population = True
-    collection = False
-
-    __slots__ = ()
-
-    def delete(self, state, dict_):
-        old = self.get(state, dict_)
-        self.fire_remove_event(
-            state, dict_, old,
-            self._remove_token or self._init_remove_token())
-        del dict_[self.key]
-
-    def get_history(self, state, dict_, passive=PASSIVE_OFF):
-        if self.key in dict_:
-            return History.from_object_attribute(self, state, dict_[self.key])
-        else:
-            if passive & INIT_OK:
-                passive ^= INIT_OK
-            current = self.get(state, dict_, passive=passive)
-            if current is PASSIVE_NO_RESULT:
-                return HISTORY_BLANK
-            else:
-                return History.from_object_attribute(self, state, current)
-
-    def get_all_pending(self, state, dict_, passive=PASSIVE_NO_INITIALIZE):
-        if self.key in dict_:
-            current = dict_[self.key]
-        elif passive & CALLABLES_OK:
-            current = self.get(state, dict_, passive=passive)
-        else:
-            return []
-
-        # can't use __hash__(), can't use __eq__() here
-        if current is not None and \
-                current is not PASSIVE_NO_RESULT and \
-                current is not NEVER_SET:
-            ret = [(instance_state(current), current)]
-        else:
-            ret = [(None, None)]
-
-        if self.key in state.committed_state:
-            original = state.committed_state[self.key]
-            if original is not None and \
-                    original is not PASSIVE_NO_RESULT and \
-                    original is not NEVER_SET and \
-                    original is not current:
-
-                ret.append((instance_state(original), original))
-        return ret
-
-    def set(self, state, dict_, value, initiator,
-            passive=PASSIVE_OFF, check_old=None, pop=False):
-        """Set a value on the given InstanceState.
-
-        """
-        if self.dispatch._active_history:
-            old = self.get(
-                state, dict_,
-                passive=PASSIVE_ONLY_PERSISTENT |
-                NO_AUTOFLUSH | LOAD_AGAINST_COMMITTED)
-        else:
-            old = self.get(
-                state, dict_, passive=PASSIVE_NO_FETCH ^ INIT_OK |
-                LOAD_AGAINST_COMMITTED)
-
-        if check_old is not None and \
-                old is not PASSIVE_NO_RESULT and \
-                check_old is not old:
-            if pop:
-                return
-            else:
-                raise ValueError(
-                    "Object %s not associated with %s on attribute '%s'" % (
-                        instance_str(check_old),
-                        state_str(state),
-                        self.key
-                    ))
-
-        value = self.fire_replace_event(state, dict_, value, old, initiator)
-        dict_[self.key] = value
-
-    def fire_remove_event(self, state, dict_, value, initiator):
-        if self.trackparent and value is not None:
-            self.sethasparent(instance_state(value), state, False)
-
-        for fn in self.dispatch.remove:
-            fn(state, value, initiator or
-               self._remove_token or self._init_remove_token())
-
-        state._modified_event(dict_, self, value)
-
-    def fire_replace_event(self, state, dict_, value, previous, initiator):
-        if self.trackparent:
-            if (previous is not value and
-                    previous not in (None, PASSIVE_NO_RESULT, NEVER_SET)):
-                self.sethasparent(instance_state(previous), state, False)
-
-        for fn in self.dispatch.set:
-            value = fn(
-                state, value, previous, initiator or
-                self._replace_token or self._init_append_or_replace_token())
-
-        state._modified_event(dict_, self, previous)
-
-        if self.trackparent:
-            if value is not None:
-                self.sethasparent(instance_state(value), state, True)
-
-        return value
-
-
-class CollectionAttributeImpl(AttributeImpl):
-    """A collection-holding attribute that instruments changes in membership.
-
-    Only handles collections of instrumented objects.
-
-    InstrumentedCollectionAttribute holds an arbitrary, user-specified
-    container object (defaulting to a list) and brokers access to the
-    CollectionAdapter, a "view" onto that object that presents consistent bag
-    semantics to the orm layer independent of the user data implementation.
-
-    """
-    accepts_scalar_loader = False
-    uses_objects = True
-    supports_population = True
-    collection = True
-
-    __slots__ = (
-        'copy', 'collection_factory', '_append_token', '_remove_token',
-        '_duck_typed_as'
-    )
-
-    def __init__(self, class_, key, callable_, dispatch,
-                 typecallable=None, trackparent=False, extension=None,
-                 copy_function=None, compare_function=None, **kwargs):
-        super(CollectionAttributeImpl, self).__init__(
-            class_,
-            key,
-            callable_, dispatch,
-            trackparent=trackparent,
-            extension=extension,
-            compare_function=compare_function,
-            **kwargs)
-
-        if copy_function is None:
-            copy_function = self.__copy
-        self.copy = copy_function
-        self.collection_factory = typecallable
-        self._append_token = None
-        self._remove_token = None
-        self._duck_typed_as = util.duck_type_collection(
-            self.collection_factory())
-
-        if getattr(self.collection_factory, "_sa_linker", None):
-
-            @event.listens_for(self, "init_collection")
-            def link(target, collection, collection_adapter):
-                collection._sa_linker(collection_adapter)
-
-            @event.listens_for(self, "dispose_collection")
-            def unlink(target, collection, collection_adapter):
-                collection._sa_linker(None)
-
-    def _init_append_token(self):
-        self._append_token = Event(self, OP_APPEND)
-        return self._append_token
-
-    def _init_remove_token(self):
-        self._remove_token = Event(self, OP_REMOVE)
-        return self._remove_token
-
-    def __copy(self, item):
-        return [y for y in collections.collection_adapter(item)]
-
-    def get_history(self, state, dict_, passive=PASSIVE_OFF):
-        current = self.get(state, dict_, passive=passive)
-        if current is PASSIVE_NO_RESULT:
-            return HISTORY_BLANK
-        else:
-            return History.from_collection(self, state, current)
-
-    def get_all_pending(self, state, dict_, passive=PASSIVE_NO_INITIALIZE):
-        # NOTE: passive is ignored here at the moment
-
-        if self.key not in dict_:
-            return []
-
-        current = dict_[self.key]
-        current = getattr(current, '_sa_adapter')
-
-        if self.key in state.committed_state:
-            original = state.committed_state[self.key]
-            if original not in (NO_VALUE, NEVER_SET):
-                current_states = [((c is not None) and
-                                   instance_state(c) or None, c)
-                                  for c in current]
-                original_states = [((c is not None) and
-                                    instance_state(c) or None, c)
-                                   for c in original]
-
-                current_set = dict(current_states)
-                original_set = dict(original_states)
-
-                return \
-                    [(s, o) for s, o in current_states
-                        if s not in original_set] + \
-                    [(s, o) for s, o in current_states
-                        if s in original_set] + \
-                    [(s, o) for s, o in original_states
-                        if s not in current_set]
-
-        return [(instance_state(o), o) for o in current]
-
-    def fire_append_event(self, state, dict_, value, initiator):
-        for fn in self.dispatch.append:
-            value = fn(
-                state, value,
-                initiator or self._append_token or self._init_append_token())
-
-        state._modified_event(dict_, self, NEVER_SET, True)
-
-        if self.trackparent and value is not None:
-            self.sethasparent(instance_state(value), state, True)
-
-        return value
-
-    def fire_pre_remove_event(self, state, dict_, initiator):
-        state._modified_event(dict_, self, NEVER_SET, True)
-
-    def fire_remove_event(self, state, dict_, value, initiator):
-        if self.trackparent and value is not None:
-            self.sethasparent(instance_state(value), state, False)
-
-        for fn in self.dispatch.remove:
-            fn(state, value,
-               initiator or self._remove_token or self._init_remove_token())
-
-        state._modified_event(dict_, self, NEVER_SET, True)
-
-    def delete(self, state, dict_):
-        if self.key not in dict_:
-            return
-
-        state._modified_event(dict_, self, NEVER_SET, True)
-
-        collection = self.get_collection(state, state.dict)
-        collection.clear_with_event()
-        # TODO: catch key errors, convert to attributeerror?
-        del dict_[self.key]
-
-    def initialize(self, state, dict_):
-        """Initialize this attribute with an empty collection."""
-
-        _, user_data = self._initialize_collection(state)
-        dict_[self.key] = user_data
-        return user_data
-
-    def _initialize_collection(self, state):
-
-        adapter, collection = state.manager.initialize_collection(
-            self.key, state, self.collection_factory)
-
-        self.dispatch.init_collection(state, collection, adapter)
-
-        return adapter, collection
-
-    def append(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        collection = self.get_collection(state, dict_, passive=passive)
-        if collection is PASSIVE_NO_RESULT:
-            value = self.fire_append_event(state, dict_, value, initiator)
-            assert self.key not in dict_, \
-                "Collection was loaded during event handling."
-            state._get_pending_mutation(self.key).append(value)
-        else:
-            collection.append_with_event(value, initiator)
-
-    def remove(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        collection = self.get_collection(state, state.dict, passive=passive)
-        if collection is PASSIVE_NO_RESULT:
-            self.fire_remove_event(state, dict_, value, initiator)
-            assert self.key not in dict_, \
-                "Collection was loaded during event handling."
-            state._get_pending_mutation(self.key).remove(value)
-        else:
-            collection.remove_with_event(value, initiator)
-
-    def pop(self, state, dict_, value, initiator, passive=PASSIVE_OFF):
-        try:
-            # TODO: better solution here would be to add
-            # a "popper" role to collections.py to complement
-            # "remover".
-            self.remove(state, dict_, value, initiator, passive=passive)
-        except (ValueError, KeyError, IndexError):
-            pass
-
-    def set(self, state, dict_, value, initiator=None,
-            passive=PASSIVE_OFF, pop=False, _adapt=True):
-        iterable = orig_iterable = value
-
-        # pulling a new collection first so that an adaptation exception does
-        # not trigger a lazy load of the old collection.
-        new_collection, user_data = self._initialize_collection(state)
-        if _adapt:
-            if new_collection._converter is not None:
-                iterable = new_collection._converter(iterable)
-            else:
-                setting_type = util.duck_type_collection(iterable)
-                receiving_type = self._duck_typed_as
-
-                if setting_type is not receiving_type:
-                    given = iterable is None and 'None' or \
-                        iterable.__class__.__name__
-                    wanted = self._duck_typed_as.__name__
-                    raise TypeError(
-                        "Incompatible collection type: %s is not %s-like" % (
-                            given, wanted))
-
-                # If the object is an adapted collection, return the (iterable)
-                # adapter.
-                if hasattr(iterable, '_sa_iterator'):
-                    iterable = iterable._sa_iterator()
-                elif setting_type is dict:
-                    if util.py3k:
-                        iterable = iterable.values()
-                    else:
-                        iterable = getattr(
-                            iterable, 'itervalues', iterable.values)()
-                else:
-                    iterable = iter(iterable)
-        new_values = list(iterable)
-
-        old = self.get(state, dict_, passive=PASSIVE_ONLY_PERSISTENT)
-        if old is PASSIVE_NO_RESULT:
-            old = self.initialize(state, dict_)
-        elif old is orig_iterable:
-            # ignore re-assignment of the current collection, as happens
-            # implicitly with in-place operators (foo.collection |= other)
-            return
-
-        # place a copy of "old" in state.committed_state
-        state._modified_event(dict_, self, old, True)
-
-        old_collection = old._sa_adapter
-
-        dict_[self.key] = user_data
-
-        collections.bulk_replace(
-            new_values, old_collection, new_collection)
-
-        del old._sa_adapter
-        self.dispatch.dispose_collection(state, old, old_collection)
-
-    def _invalidate_collection(self, collection):
-        adapter = getattr(collection, '_sa_adapter')
-        adapter.invalidated = True
-
-    def set_committed_value(self, state, dict_, value):
-        """Set an attribute value on the given instance and 'commit' it."""
-
-        collection, user_data = self._initialize_collection(state)
-
-        if value:
-            collection.append_multiple_without_event(value)
-
-        state.dict[self.key] = user_data
-
-        state._commit(dict_, [self.key])
-
-        if self.key in state._pending_mutations:
-            # pending items exist.  issue a modified event,
-            # add/remove new items.
-            state._modified_event(dict_, self, user_data, True)
-
-            pending = state._pending_mutations.pop(self.key)
-            added = pending.added_items
-            removed = pending.deleted_items
-            for item in added:
-                collection.append_without_event(item)
-            for item in removed:
-                collection.remove_without_event(item)
-
-        return user_data
-
-    def get_collection(self, state, dict_,
-                       user_data=None, passive=PASSIVE_OFF):
-        """Retrieve the CollectionAdapter associated with the given state.
-
-        Creates a new CollectionAdapter if one does not exist.
-
-        """
-        if user_data is None:
-            user_data = self.get(state, dict_, passive=passive)
-            if user_data is PASSIVE_NO_RESULT:
-                return user_data
-
-        return getattr(user_data, '_sa_adapter')
-
-
-def backref_listeners(attribute, key, uselist):
-    """Apply listeners to synchronize a two-way relationship."""
-
-    # use easily recognizable names for stack traces
-
-    parent_token = attribute.impl.parent_token
-    parent_impl = attribute.impl
-
-    def _acceptable_key_err(child_state, initiator, child_impl):
-        raise ValueError(
-            "Bidirectional attribute conflict detected: "
-            'Passing object %s to attribute "%s" '
-            'triggers a modify event on attribute "%s" '
-            'via the backref "%s".' % (
-                state_str(child_state),
-                initiator.parent_token,
-                child_impl.parent_token,
-                attribute.impl.parent_token
-            )
-        )
-
-    def emit_backref_from_scalar_set_event(state, child, oldchild, initiator):
-        if oldchild is child:
-            return child
-        if oldchild is not None and \
-                oldchild is not PASSIVE_NO_RESULT and \
-                oldchild is not NEVER_SET:
-            # With lazy=None, there's no guarantee that the full collection is
-            # present when updating via a backref.
-            old_state, old_dict = instance_state(oldchild),\
-                instance_dict(oldchild)
-            impl = old_state.manager[key].impl
-
-            if initiator.impl is not impl or \
-                    initiator.op not in (OP_REPLACE, OP_REMOVE):
-                impl.pop(old_state,
-                         old_dict,
-                         state.obj(),
-                         parent_impl._append_token or
-                            parent_impl._init_append_token(),
-                         passive=PASSIVE_NO_FETCH)
-
-        if child is not None:
-            child_state, child_dict = instance_state(child),\
-                instance_dict(child)
-            child_impl = child_state.manager[key].impl
-            if initiator.parent_token is not parent_token and \
-                    initiator.parent_token is not child_impl.parent_token:
-                _acceptable_key_err(state, initiator, child_impl)
-            elif initiator.impl is not child_impl or \
-                    initiator.op not in (OP_APPEND, OP_REPLACE):
-                child_impl.append(
-                    child_state,
-                    child_dict,
-                    state.obj(),
-                    initiator,
-                    passive=PASSIVE_NO_FETCH)
-        return child
-
-    def emit_backref_from_collection_append_event(state, child, initiator):
-        if child is None:
-            return
-
-        child_state, child_dict = instance_state(child), \
-            instance_dict(child)
-        child_impl = child_state.manager[key].impl
-
-        if initiator.parent_token is not parent_token and \
-                initiator.parent_token is not child_impl.parent_token:
-            _acceptable_key_err(state, initiator, child_impl)
-        elif initiator.impl is not child_impl or \
-                initiator.op not in (OP_APPEND, OP_REPLACE):
-            child_impl.append(
-                child_state,
-                child_dict,
-                state.obj(),
-                initiator,
-                passive=PASSIVE_NO_FETCH)
-        return child
-
-    def emit_backref_from_collection_remove_event(state, child, initiator):
-        if child is not None:
-            child_state, child_dict = instance_state(child),\
-                instance_dict(child)
-            child_impl = child_state.manager[key].impl
-            if initiator.impl is not child_impl or \
-                    initiator.op not in (OP_REMOVE, OP_REPLACE):
-                child_impl.pop(
-                    child_state,
-                    child_dict,
-                    state.obj(),
-                    initiator,
-                    passive=PASSIVE_NO_FETCH)
-
-    if uselist:
-        event.listen(attribute, "append",
-                     emit_backref_from_collection_append_event,
-                     retval=True, raw=True)
-    else:
-        event.listen(attribute, "set",
-                     emit_backref_from_scalar_set_event,
-                     retval=True, raw=True)
-    # TODO: need coverage in test/orm/ of remove event
-    event.listen(attribute, "remove",
-                 emit_backref_from_collection_remove_event,
-                 retval=True, raw=True)
-
-_NO_HISTORY = util.symbol('NO_HISTORY')
-_NO_STATE_SYMBOLS = frozenset([
-    id(PASSIVE_NO_RESULT),
-    id(NO_VALUE),
-    id(NEVER_SET)])
-
-History = util.namedtuple("History", [
-    "added", "unchanged", "deleted"
-])
-
-
-class History(History):
-    """A 3-tuple of added, unchanged and deleted values,
-    representing the changes which have occurred on an instrumented
-    attribute.
-
-    The easiest way to get a :class:`.History` object for a particular
-    attribute on an object is to use the :func:`.inspect` function::
-
-        from sqlalchemy import inspect
-
-        hist = inspect(myobject).attrs.myattribute.history
-
-    Each tuple member is an iterable sequence:
-
-    * ``added`` - the collection of items added to the attribute (the first
-      tuple element).
-
-    * ``unchanged`` - the collection of items that have not changed on the
-      attribute (the second tuple element).
-
-    * ``deleted`` - the collection of items that have been removed from the
-      attribute (the third tuple element).
-
-    """
-
-    def __bool__(self):
-        return self != HISTORY_BLANK
-    __nonzero__ = __bool__
-
-    def empty(self):
-        """Return True if this :class:`.History` has no changes
-        and no existing, unchanged state.
-
-        """
-
-        return not bool(
-            (self.added or self.deleted)
-            or self.unchanged
-        )
-
-    def sum(self):
-        """Return a collection of added + unchanged + deleted."""
-
-        return (self.added or []) +\
-            (self.unchanged or []) +\
-            (self.deleted or [])
-
-    def non_deleted(self):
-        """Return a collection of added + unchanged."""
-
-        return (self.added or []) +\
-            (self.unchanged or [])
-
-    def non_added(self):
-        """Return a collection of unchanged + deleted."""
-
-        return (self.unchanged or []) +\
-            (self.deleted or [])
-
-    def has_changes(self):
-        """Return True if this :class:`.History` has changes."""
-
-        return bool(self.added or self.deleted)
-
-    def as_state(self):
-        return History(
-            [(c is not None)
-             and instance_state(c) or None
-             for c in self.added],
-            [(c is not None)
-             and instance_state(c) or None
-             for c in self.unchanged],
-            [(c is not None)
-             and instance_state(c) or None
-             for c in self.deleted],
-        )
-
-    @classmethod
-    def from_scalar_attribute(cls, attribute, state, current):
-        original = state.committed_state.get(attribute.key, _NO_HISTORY)
-
-        if original is _NO_HISTORY:
-            if current is NEVER_SET:
-                return cls((), (), ())
-            else:
-                return cls((), [current], ())
-        # don't let ClauseElement expressions here trip things up
-        elif attribute.is_equal(current, original) is True:
-            return cls((), [current], ())
-        else:
-            # current convention on native scalars is to not
-            # include information
-            # about missing previous value in "deleted", but
-            # we do include None, which helps in some primary
-            # key situations
-            if id(original) in _NO_STATE_SYMBOLS:
-                deleted = ()
-            else:
-                deleted = [original]
-            if current is NEVER_SET:
-                return cls((), (), deleted)
-            else:
-                return cls([current], (), deleted)
-
-    @classmethod
-    def from_object_attribute(cls, attribute, state, current):
-        original = state.committed_state.get(attribute.key, _NO_HISTORY)
-
-        if original is _NO_HISTORY:
-            if current is NO_VALUE or current is NEVER_SET:
-                return cls((), (), ())
-            else:
-                return cls((), [current], ())
-        elif current is original:
-            return cls((), [current], ())
-        else:
-            # current convention on related objects is to not
-            # include information
-            # about missing previous value in "deleted", and
-            # to also not include None - the dependency.py rules
-            # ignore the None in any case.
-            if id(original) in _NO_STATE_SYMBOLS or original is None:
-                deleted = ()
-            else:
-                deleted = [original]
-            if current is NO_VALUE or current is NEVER_SET:
-                return cls((), (), deleted)
-            else:
-                return cls([current], (), deleted)
-
-    @classmethod
-    def from_collection(cls, attribute, state, current):
-        original = state.committed_state.get(attribute.key, _NO_HISTORY)
-
-        if current is NO_VALUE or current is NEVER_SET:
-            return cls((), (), ())
-
-        current = getattr(current, '_sa_adapter')
-        if original in (NO_VALUE, NEVER_SET):
-            return cls(list(current), (), ())
-        elif original is _NO_HISTORY:
-            return cls((), list(current), ())
-        else:
-
-            current_states = [((c is not None) and instance_state(c)
-                               or None, c)
-                              for c in current
-                              ]
-            original_states = [((c is not None) and instance_state(c)
-                                or None, c)
-                               for c in original
-                               ]
-
-            current_set = dict(current_states)
-            original_set = dict(original_states)
-
-            return cls(
-                [o for s, o in current_states if s not in original_set],
-                [o for s, o in current_states if s in original_set],
-                [o for s, o in original_states if s not in current_set]
-            )
-
-HISTORY_BLANK = History(None, None, None)
-
-
-def get_history(obj, key, passive=PASSIVE_OFF):
-    """Return a :class:`.History` record for the given object
-    and attribute key.
-
-    :param obj: an object whose class is instrumented by the
-      attributes package.
-
-    :param key: string attribute name.
-
-    :param passive: indicates loading behavior for the attribute
-       if the value is not already present.   This is a
-       bitflag attribute, which defaults to the symbol
-       :attr:`.PASSIVE_OFF` indicating all necessary SQL
-       should be emitted.
-
-    """
-    if passive is True:
-        util.warn_deprecated("Passing True for 'passive' is deprecated. "
-                             "Use attributes.PASSIVE_NO_INITIALIZE")
-        passive = PASSIVE_NO_INITIALIZE
-    elif passive is False:
-        util.warn_deprecated("Passing False for 'passive' is "
-                             "deprecated.  Use attributes.PASSIVE_OFF")
-        passive = PASSIVE_OFF
-
-    return get_state_history(instance_state(obj), key, passive)
-
-
-def get_state_history(state, key, passive=PASSIVE_OFF):
-    return state.get_history(key, passive)
-
-
-def has_parent(cls, obj, key, optimistic=False):
-    """TODO"""
-    manager = manager_of_class(cls)
-    state = instance_state(obj)
-    return manager.has_parent(state, key, optimistic)
-
-
-def register_attribute(class_, key, **kw):
-    comparator = kw.pop('comparator', None)
-    parententity = kw.pop('parententity', None)
-    doc = kw.pop('doc', None)
-    desc = register_descriptor(class_, key,
-                               comparator, parententity, doc=doc)
-    register_attribute_impl(class_, key, **kw)
-    return desc
-
-
-def register_attribute_impl(class_, key,
-                            uselist=False, callable_=None,
-                            useobject=False,
-                            impl_class=None, backref=None, **kw):
-
-    manager = manager_of_class(class_)
-    if uselist:
-        factory = kw.pop('typecallable', None)
-        typecallable = manager.instrument_collection_class(
-            key, factory or list)
-    else:
-        typecallable = kw.pop('typecallable', None)
-
-    dispatch = manager[key].dispatch
-
-    if impl_class:
-        impl = impl_class(class_, key, typecallable, dispatch, **kw)
-    elif uselist:
-        impl = CollectionAttributeImpl(class_, key, callable_, dispatch,
-                                       typecallable=typecallable, **kw)
-    elif useobject:
-        impl = ScalarObjectAttributeImpl(class_, key, callable_,
-                                         dispatch, **kw)
-    else:
-        impl = ScalarAttributeImpl(class_, key, callable_, dispatch, **kw)
-
-    manager[key].impl = impl
-
-    if backref:
-        backref_listeners(manager[key], backref, uselist)
-
-    manager.post_configure_attribute(key)
-    return manager[key]
-
-
-def register_descriptor(class_, key, comparator=None,
-                        parententity=None, doc=None):
-    manager = manager_of_class(class_)
-
-    descriptor = InstrumentedAttribute(class_, key, comparator=comparator,
-                                       parententity=parententity)
-
-    descriptor.__doc__ = doc
-
-    manager.instrument_attribute(key, descriptor)
-    return descriptor
-
-
-def unregister_attribute(class_, key):
-    manager_of_class(class_).uninstrument_attribute(key)
-
-
-def init_collection(obj, key):
-    """Initialize a collection attribute and return the collection adapter.
-
-    This function is used to provide direct access to collection internals
-    for a previously unloaded attribute.  e.g.::
-
-        collection_adapter = init_collection(someobject, 'elements')
-        for elem in values:
-            collection_adapter.append_without_event(elem)
-
-    For an easier way to do the above, see
-    :func:`~sqlalchemy.orm.attributes.set_committed_value`.
-
-    obj is an instrumented object instance.  An InstanceState
-    is accepted directly for backwards compatibility but
-    this usage is deprecated.
-
-    """
-    state = instance_state(obj)
-    dict_ = state.dict
-    return init_state_collection(state, dict_, key)
-
-
-def init_state_collection(state, dict_, key):
-    """Initialize a collection attribute and return the collection adapter."""
-
-    attr = state.manager[key].impl
-    user_data = attr.initialize(state, dict_)
-    return attr.get_collection(state, dict_, user_data)
-
-
-def set_committed_value(instance, key, value):
-    """Set the value of an attribute with no history events.
-
-    Cancels any previous history present.  The value should be
-    a scalar value for scalar-holding attributes, or
-    an iterable for any collection-holding attribute.
-
-    This is the same underlying method used when a lazy loader
-    fires off and loads additional data from the database.
-    In particular, this method can be used by application code
-    which has loaded additional attributes or collections through
-    separate queries, which can then be attached to an instance
-    as though it were part of its original loaded state.
-
-    """
-    state, dict_ = instance_state(instance), instance_dict(instance)
-    state.manager[key].impl.set_committed_value(state, dict_, value)
-
-
-def set_attribute(instance, key, value):
-    """Set the value of an attribute, firing history events.
-
-    This function may be used regardless of instrumentation
-    applied directly to the class, i.e. no descriptors are required.
-    Custom attribute management schemes will need to make usage
-    of this method to establish attribute state as understood
-    by SQLAlchemy.
-
-    """
-    state, dict_ = instance_state(instance), instance_dict(instance)
-    state.manager[key].impl.set(state, dict_, value, None)
-
-
-def get_attribute(instance, key):
-    """Get the value of an attribute, firing any callables required.
-
-    This function may be used regardless of instrumentation
-    applied directly to the class, i.e. no descriptors are required.
-    Custom attribute management schemes will need to make usage
-    of this method to make usage of attribute state as understood
-    by SQLAlchemy.
-
-    """
-    state, dict_ = instance_state(instance), instance_dict(instance)
-    return state.manager[key].impl.get(state, dict_)
-
-
-def del_attribute(instance, key):
-    """Delete the value of an attribute, firing history events.
-
-    This function may be used regardless of instrumentation
-    applied directly to the class, i.e. no descriptors are required.
-    Custom attribute management schemes will need to make usage
-    of this method to establish attribute state as understood
-    by SQLAlchemy.
-
-    """
-    state, dict_ = instance_state(instance), instance_dict(instance)
-    state.manager[key].impl.delete(state, dict_)
-
-
-def flag_modified(instance, key):
-    """Mark an attribute on an instance as 'modified'.
-
-    This sets the 'modified' flag on the instance and
-    establishes an unconditional change event for the given attribute.
-
-    """
-    state, dict_ = instance_state(instance), instance_dict(instance)
-    impl = state.manager[key].impl
-    state._modified_event(dict_, impl, NO_VALUE, force=True)
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_modules/sqlalchemy/sql/sqltypes.html b/docs/_modules/sqlalchemy/sql/sqltypes.html deleted file mode 100644 index 35e27e4..0000000 --- a/docs/_modules/sqlalchemy/sql/sqltypes.html +++ /dev/null @@ -1,2738 +0,0 @@ - - - - - - - sqlalchemy.sql.sqltypes — arXiv submission & moderation 0.1 documentation - - - - - - - - - - - - - - - - - - -
-
-
-
- -

Source code for sqlalchemy.sql.sqltypes

-# sql/sqltypes.py
-# Copyright (C) 2005-2017 the SQLAlchemy authors and contributors
-# <see AUTHORS file>
-#
-# This module is part of SQLAlchemy and is released under
-# the MIT License: http://www.opensource.org/licenses/mit-license.php
-
-"""SQL specific types.
-
-"""
-
-import datetime as dt
-import codecs
-import collections
-import json
-
-from . import elements
-from .type_api import TypeEngine, TypeDecorator, to_instance, Variant
-from .elements import quoted_name, TypeCoerce as type_coerce, _defer_name, \
-    Slice, _literal_as_binds
-from .. import exc, util, processors
-from .base import _bind_or_error, SchemaEventTarget
-from . import operators
-from .. import inspection
-from .. import event
-from ..util import pickle
-from ..util import compat
-import decimal
-
-if util.jython:
-    import array
-
-
-class _DateAffinity(object):
-
-    """Mixin date/time specific expression adaptations.
-
-    Rules are implemented within Date,Time,Interval,DateTime, Numeric,
-    Integer. Based on http://www.postgresql.org/docs/current/static
-    /functions-datetime.html.
-
-    """
-
-    @property
-    def _expression_adaptations(self):
-        raise NotImplementedError()
-
-    class Comparator(TypeEngine.Comparator):
-        _blank_dict = util.immutabledict()
-
-        def _adapt_expression(self, op, other_comparator):
-            othertype = other_comparator.type._type_affinity
-            return (
-                op, to_instance(
-                    self.type._expression_adaptations.
-                    get(op, self._blank_dict).
-                    get(othertype, NULLTYPE))
-            )
-    comparator_factory = Comparator
-
-
-class Concatenable(object):
-
-    """A mixin that marks a type as supporting 'concatenation',
-    typically strings."""
-
-    class Comparator(TypeEngine.Comparator):
-
-        def _adapt_expression(self, op, other_comparator):
-            if (op is operators.add and
-                    isinstance(
-                        other_comparator,
-                        (Concatenable.Comparator, NullType.Comparator)
-                    )):
-                return operators.concat_op, self.expr.type
-            else:
-                return super(Concatenable.Comparator, self)._adapt_expression(
-                    op, other_comparator)
-
-    comparator_factory = Comparator
-
-
-class Indexable(object):
-    """A mixin that marks a type as supporting indexing operations,
-    such as array or JSON structures.
-
-
-    .. versionadded:: 1.1.0
-
-
-    """
-
-    class Comparator(TypeEngine.Comparator):
-
-        def _setup_getitem(self, index):
-            raise NotImplementedError()
-
-        def __getitem__(self, index):
-            adjusted_op, adjusted_right_expr, result_type = \
-                self._setup_getitem(index)
-            return self.operate(
-                adjusted_op,
-                adjusted_right_expr,
-                result_type=result_type
-            )
-
-    comparator_factory = Comparator
-
-
-class String(Concatenable, TypeEngine):
-
-    """The base for all string and character types.
-
-    In SQL, corresponds to VARCHAR.  Can also take Python unicode objects
-    and encode to the database's encoding in bind params (and the reverse for
-    result sets.)
-
-    The `length` field is usually required when the `String` type is
-    used within a CREATE TABLE statement, as VARCHAR requires a length
-    on most databases.
-
-    """
-
-    __visit_name__ = 'string'
-
-    def __init__(self, length=None, collation=None,
-                 convert_unicode=False,
-                 unicode_error=None,
-                 _warn_on_bytestring=False
-                 ):
-        """
-        Create a string-holding type.
-
-        :param length: optional, a length for the column for use in
-          DDL and CAST expressions.  May be safely omitted if no ``CREATE
-          TABLE`` will be issued.  Certain databases may require a
-          ``length`` for use in DDL, and will raise an exception when
-          the ``CREATE TABLE`` DDL is issued if a ``VARCHAR``
-          with no length is included.  Whether the value is
-          interpreted as bytes or characters is database specific.
-
-        :param collation: Optional, a column-level collation for
-          use in DDL and CAST expressions.  Renders using the
-          COLLATE keyword supported by SQLite, MySQL, and PostgreSQL.
-          E.g.::
-
-            >>> from sqlalchemy import cast, select, String
-            >>> print select([cast('some string', String(collation='utf8'))])
-            SELECT CAST(:param_1 AS VARCHAR COLLATE utf8) AS anon_1
-
-          .. versionadded:: 0.8 Added support for COLLATE to all
-             string types.
-
-        :param convert_unicode: When set to ``True``, the
-          :class:`.String` type will assume that
-          input is to be passed as Python ``unicode`` objects,
-          and results returned as Python ``unicode`` objects.
-          If the DBAPI in use does not support Python unicode
-          (which is fewer and fewer these days), SQLAlchemy
-          will encode/decode the value, using the
-          value of the ``encoding`` parameter passed to
-          :func:`.create_engine` as the encoding.
-
-          When using a DBAPI that natively supports Python
-          unicode objects, this flag generally does not
-          need to be set.  For columns that are explicitly
-          intended to store non-ASCII data, the :class:`.Unicode`
-          or :class:`.UnicodeText`
-          types should be used regardless, which feature
-          the same behavior of ``convert_unicode`` but
-          also indicate an underlying column type that
-          directly supports unicode, such as ``NVARCHAR``.
-
-          For the extremely rare case that Python ``unicode``
-          is to be encoded/decoded by SQLAlchemy on a backend
-          that does natively support Python ``unicode``,
-          the value ``force`` can be passed here which will
-          cause SQLAlchemy's encode/decode services to be
-          used unconditionally.
-
-        :param unicode_error: Optional, a method to use to handle Unicode
-          conversion errors. Behaves like the ``errors`` keyword argument to
-          the standard library's ``string.decode()`` functions.   This flag
-          requires that ``convert_unicode`` is set to ``force`` - otherwise,
-          SQLAlchemy is not guaranteed to handle the task of unicode
-          conversion.   Note that this flag adds significant performance
-          overhead to row-fetching operations for backends that already
-          return unicode objects natively (which most DBAPIs do).  This
-          flag should only be used as a last resort for reading
-          strings from a column with varied or corrupted encodings.
-
-        """
-        if unicode_error is not None and convert_unicode != 'force':
-            raise exc.ArgumentError("convert_unicode must be 'force' "
-                                    "when unicode_error is set.")
-
-        self.length = length
-        self.collation = collation
-        self.convert_unicode = convert_unicode
-        self.unicode_error = unicode_error
-        self._warn_on_bytestring = _warn_on_bytestring
-
-    def literal_processor(self, dialect):
-        def process(value):
-            value = value.replace("'", "''")
-            return "'%s'" % value
-        return process
-
-    def bind_processor(self, dialect):
-        if self.convert_unicode or dialect.convert_unicode:
-            if dialect.supports_unicode_binds and \
-                    self.convert_unicode != 'force':
-                if self._warn_on_bytestring:
-                    def process(value):
-                        if isinstance(value, util.binary_type):
-                            util.warn_limited(
-                                "Unicode type received non-unicode "
-                                "bind param value %r.",
-                                (util.ellipses_string(value),))
-                        return value
-                    return process
-                else:
-                    return None
-            else:
-                encoder = codecs.getencoder(dialect.encoding)
-                warn_on_bytestring = self._warn_on_bytestring
-
-                def process(value):
-                    if isinstance(value, util.text_type):
-                        return encoder(value, self.unicode_error)[0]
-                    elif warn_on_bytestring and value is not None:
-                        util.warn_limited(
-                            "Unicode type received non-unicode bind "
-                            "param value %r.",
-                            (util.ellipses_string(value),))
-                    return value
-            return process
-        else:
-            return None
-
-    def result_processor(self, dialect, coltype):
-        wants_unicode = self.convert_unicode or dialect.convert_unicode
-        needs_convert = wants_unicode and \
-            (dialect.returns_unicode_strings is not True or
-             self.convert_unicode in ('force', 'force_nocheck'))
-        needs_isinstance = (
-            needs_convert and
-            dialect.returns_unicode_strings and
-            self.convert_unicode != 'force_nocheck'
-        )
-        if needs_convert:
-            if needs_isinstance:
-                return processors.to_conditional_unicode_processor_factory(
-                    dialect.encoding, self.unicode_error)
-            else:
-                return processors.to_unicode_processor_factory(
-                    dialect.encoding, self.unicode_error)
-        else:
-            return None
-
-    @property
-    def python_type(self):
-        if self.convert_unicode:
-            return util.text_type
-        else:
-            return str
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.STRING
-
-
-class Text(String):
-
-    """A variably sized string type.
-
-    In SQL, usually corresponds to CLOB or TEXT. Can also take Python
-    unicode objects and encode to the database's encoding in bind
-    params (and the reverse for result sets.)  In general, TEXT objects
-    do not have a length; while some databases will accept a length
-    argument here, it will be rejected by others.
-
-    """
-    __visit_name__ = 'text'
-
-
-class Unicode(String):
-
-    """A variable length Unicode string type.
-
-    The :class:`.Unicode` type is a :class:`.String` subclass
-    that assumes input and output as Python ``unicode`` data,
-    and in that regard is equivalent to the usage of the
-    ``convert_unicode`` flag with the :class:`.String` type.
-    However, unlike plain :class:`.String`, it also implies an
-    underlying column type that is explicitly supporting of non-ASCII
-    data, such as ``NVARCHAR`` on Oracle and SQL Server.
-    This can impact the output of ``CREATE TABLE`` statements
-    and ``CAST`` functions at the dialect level, and can
-    also affect the handling of bound parameters in some
-    specific DBAPI scenarios.
-
-    The encoding used by the :class:`.Unicode` type is usually
-    determined by the DBAPI itself; most modern DBAPIs
-    feature support for Python ``unicode`` objects as bound
-    values and result set values, and the encoding should
-    be configured as detailed in the notes for the target
-    DBAPI in the :ref:`dialect_toplevel` section.
-
-    For those DBAPIs which do not support, or are not configured
-    to accommodate Python ``unicode`` objects
-    directly, SQLAlchemy does the encoding and decoding
-    outside of the DBAPI.   The encoding in this scenario
-    is determined by the ``encoding`` flag passed to
-    :func:`.create_engine`.
-
-    When using the :class:`.Unicode` type, it is only appropriate
-    to pass Python ``unicode`` objects, and not plain ``str``.
-    If a plain ``str`` is passed under Python 2, a warning
-    is emitted.  If you notice your application emitting these warnings but
-    you're not sure of the source of them, the Python
-    ``warnings`` filter, documented at
-    http://docs.python.org/library/warnings.html,
-    can be used to turn these warnings into exceptions
-    which will illustrate a stack trace::
-
-      import warnings
-      warnings.simplefilter('error')
-
-    For an application that wishes to pass plain bytestrings
-    and Python ``unicode`` objects to the ``Unicode`` type
-    equally, the bytestrings must first be decoded into
-    unicode.  The recipe at :ref:`coerce_to_unicode` illustrates
-    how this is done.
-
-    See also:
-
-        :class:`.UnicodeText` - unlengthed textual counterpart
-        to :class:`.Unicode`.
-
-    """
-
-    __visit_name__ = 'unicode'
-
-    def __init__(self, length=None, **kwargs):
-        """
-        Create a :class:`.Unicode` object.
-
-        Parameters are the same as that of :class:`.String`,
-        with the exception that ``convert_unicode``
-        defaults to ``True``.
-
-        """
-        kwargs.setdefault('convert_unicode', True)
-        kwargs.setdefault('_warn_on_bytestring', True)
-        super(Unicode, self).__init__(length=length, **kwargs)
-
-
-class UnicodeText(Text):
-
-    """An unbounded-length Unicode string type.
-
-    See :class:`.Unicode` for details on the unicode
-    behavior of this object.
-
-    Like :class:`.Unicode`, usage the :class:`.UnicodeText` type implies a
-    unicode-capable type being used on the backend, such as
-    ``NCLOB``, ``NTEXT``.
-
-    """
-
-    __visit_name__ = 'unicode_text'
-
-    def __init__(self, length=None, **kwargs):
-        """
-        Create a Unicode-converting Text type.
-
-        Parameters are the same as that of :class:`.Text`,
-        with the exception that ``convert_unicode``
-        defaults to ``True``.
-
-        """
-        kwargs.setdefault('convert_unicode', True)
-        kwargs.setdefault('_warn_on_bytestring', True)
-        super(UnicodeText, self).__init__(length=length, **kwargs)
-
-
-class Integer(_DateAffinity, TypeEngine):
-
-    """A type for ``int`` integers."""
-
-    __visit_name__ = 'integer'
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.NUMBER
-
-    @property
-    def python_type(self):
-        return int
-
-    def literal_processor(self, dialect):
-        def process(value):
-            return str(value)
-        return process
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        # TODO: need a dictionary object that will
-        # handle operators generically here, this is incomplete
-        return {
-            operators.add: {
-                Date: Date,
-                Integer: self.__class__,
-                Numeric: Numeric,
-            },
-            operators.mul: {
-                Interval: Interval,
-                Integer: self.__class__,
-                Numeric: Numeric,
-            },
-            operators.div: {
-                Integer: self.__class__,
-                Numeric: Numeric,
-            },
-            operators.truediv: {
-                Integer: self.__class__,
-                Numeric: Numeric,
-            },
-            operators.sub: {
-                Integer: self.__class__,
-                Numeric: Numeric,
-            },
-        }
-
-
-class SmallInteger(Integer):
-
-    """A type for smaller ``int`` integers.
-
-    Typically generates a ``SMALLINT`` in DDL, and otherwise acts like
-    a normal :class:`.Integer` on the Python side.
-
-    """
-
-    __visit_name__ = 'small_integer'
-
-
-class BigInteger(Integer):
-
-    """A type for bigger ``int`` integers.
-
-    Typically generates a ``BIGINT`` in DDL, and otherwise acts like
-    a normal :class:`.Integer` on the Python side.
-
-    """
-
-    __visit_name__ = 'big_integer'
-
-
-class Numeric(_DateAffinity, TypeEngine):
-
-    """A type for fixed precision numbers, such as ``NUMERIC`` or ``DECIMAL``.
-
-    This type returns Python ``decimal.Decimal`` objects by default, unless
-    the :paramref:`.Numeric.asdecimal` flag is set to False, in which case
-    they are coerced to Python ``float`` objects.
-
-    .. note::
-
-        The :class:`.Numeric` type is designed to receive data from a database
-        type that is explicitly known to be a decimal type
-        (e.g. ``DECIMAL``, ``NUMERIC``, others) and not a floating point
-        type (e.g. ``FLOAT``, ``REAL``, others).
-        If the database column on the server is in fact a floating-point type
-        type, such as ``FLOAT`` or ``REAL``, use the :class:`.Float`
-        type or a subclass, otherwise numeric coercion between
-        ``float``/``Decimal`` may or may not function as expected.
-
-    .. note::
-
-       The Python ``decimal.Decimal`` class is generally slow
-       performing; cPython 3.3 has now switched to use the `cdecimal
-       <http://pypi.python.org/pypi/cdecimal/>`_ library natively. For
-       older Python versions, the ``cdecimal`` library can be patched
-       into any application where it will replace the ``decimal``
-       library fully, however this needs to be applied globally and
-       before any other modules have been imported, as follows::
-
-           import sys
-           import cdecimal
-           sys.modules["decimal"] = cdecimal
-
-       Note that the ``cdecimal`` and ``decimal`` libraries are **not
-       compatible with each other**, so patching ``cdecimal`` at the
-       global level is the only way it can be used effectively with
-       various DBAPIs that hardcode to import the ``decimal`` library.
-
-    """
-
-    __visit_name__ = 'numeric'
-
-    _default_decimal_return_scale = 10
-
-    def __init__(self, precision=None, scale=None,
-                 decimal_return_scale=None, asdecimal=True):
-        """
-        Construct a Numeric.
-
-        :param precision: the numeric precision for use in DDL ``CREATE
-          TABLE``.
-
-        :param scale: the numeric scale for use in DDL ``CREATE TABLE``.
-
-        :param asdecimal: default True.  Return whether or not
-          values should be sent as Python Decimal objects, or
-          as floats.   Different DBAPIs send one or the other based on
-          datatypes - the Numeric type will ensure that return values
-          are one or the other across DBAPIs consistently.
-
-        :param decimal_return_scale: Default scale to use when converting
-         from floats to Python decimals.  Floating point values will typically
-         be much longer due to decimal inaccuracy, and most floating point
-         database types don't have a notion of "scale", so by default the
-         float type looks for the first ten decimal places when converting.
-         Specfiying this value will override that length.  Types which
-         do include an explicit ".scale" value, such as the base
-         :class:`.Numeric` as well as the MySQL float types, will use the
-         value of ".scale" as the default for decimal_return_scale, if not
-         otherwise specified.
-
-         .. versionadded:: 0.9.0
-
-        When using the ``Numeric`` type, care should be taken to ensure
-        that the asdecimal setting is apppropriate for the DBAPI in use -
-        when Numeric applies a conversion from Decimal->float or float->
-        Decimal, this conversion incurs an additional performance overhead
-        for all result columns received.
-
-        DBAPIs that return Decimal natively (e.g. psycopg2) will have
-        better accuracy and higher performance with a setting of ``True``,
-        as the native translation to Decimal reduces the amount of floating-
-        point issues at play, and the Numeric type itself doesn't need
-        to apply any further conversions.  However, another DBAPI which
-        returns floats natively *will* incur an additional conversion
-        overhead, and is still subject to floating point data loss - in
-        which case ``asdecimal=False`` will at least remove the extra
-        conversion overhead.
-
-        """
-        self.precision = precision
-        self.scale = scale
-        self.decimal_return_scale = decimal_return_scale
-        self.asdecimal = asdecimal
-
-    @property
-    def _effective_decimal_return_scale(self):
-        if self.decimal_return_scale is not None:
-            return self.decimal_return_scale
-        elif getattr(self, "scale", None) is not None:
-            return self.scale
-        else:
-            return self._default_decimal_return_scale
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.NUMBER
-
-    def literal_processor(self, dialect):
-        def process(value):
-            return str(value)
-        return process
-
-    @property
-    def python_type(self):
-        if self.asdecimal:
-            return decimal.Decimal
-        else:
-            return float
-
-    def bind_processor(self, dialect):
-        if dialect.supports_native_decimal:
-            return None
-        else:
-            return processors.to_float
-
-    def result_processor(self, dialect, coltype):
-        if self.asdecimal:
-            if dialect.supports_native_decimal:
-                # we're a "numeric", DBAPI will give us Decimal directly
-                return None
-            else:
-                util.warn('Dialect %s+%s does *not* support Decimal '
-                          'objects natively, and SQLAlchemy must '
-                          'convert from floating point - rounding '
-                          'errors and other issues may occur. Please '
-                          'consider storing Decimal numbers as strings '
-                          'or integers on this platform for lossless '
-                          'storage.' % (dialect.name, dialect.driver))
-
-                # we're a "numeric", DBAPI returns floats, convert.
-                return processors.to_decimal_processor_factory(
-                    decimal.Decimal,
-                    self.scale if self.scale is not None
-                    else self._default_decimal_return_scale)
-        else:
-            if dialect.supports_native_decimal:
-                return processors.to_float
-            else:
-                return None
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.mul: {
-                Interval: Interval,
-                Numeric: self.__class__,
-                Integer: self.__class__,
-            },
-            operators.div: {
-                Numeric: self.__class__,
-                Integer: self.__class__,
-            },
-            operators.truediv: {
-                Numeric: self.__class__,
-                Integer: self.__class__,
-            },
-            operators.add: {
-                Numeric: self.__class__,
-                Integer: self.__class__,
-            },
-            operators.sub: {
-                Numeric: self.__class__,
-                Integer: self.__class__,
-            }
-        }
-
-
-class Float(Numeric):
-
-    """Type representing floating point types, such as ``FLOAT`` or ``REAL``.
-
-    This type returns Python ``float`` objects by default, unless the
-    :paramref:`.Float.asdecimal` flag is set to True, in which case they
-    are coerced to ``decimal.Decimal`` objects.
-
-    .. note::
-
-        The :class:`.Float` type is designed to receive data from a database
-        type that is explicitly known to be a floating point type
-        (e.g. ``FLOAT``, ``REAL``, others)
-        and not a decimal type (e.g. ``DECIMAL``, ``NUMERIC``, others).
-        If the database column on the server is in fact a Numeric
-        type, such as ``DECIMAL`` or ``NUMERIC``, use the :class:`.Numeric`
-        type or a subclass, otherwise numeric coercion between
-        ``float``/``Decimal`` may or may not function as expected.
-
-    """
-
-    __visit_name__ = 'float'
-
-    scale = None
-
-    def __init__(self, precision=None, asdecimal=False,
-                 decimal_return_scale=None, **kwargs):
-        r"""
-        Construct a Float.
-
-        :param precision: the numeric precision for use in DDL ``CREATE
-           TABLE``.
-
-        :param asdecimal: the same flag as that of :class:`.Numeric`, but
-          defaults to ``False``.   Note that setting this flag to ``True``
-          results in floating point conversion.
-
-        :param decimal_return_scale: Default scale to use when converting
-         from floats to Python decimals.  Floating point values will typically
-         be much longer due to decimal inaccuracy, and most floating point
-         database types don't have a notion of "scale", so by default the
-         float type looks for the first ten decimal places when converting.
-         Specfiying this value will override that length.  Note that the
-         MySQL float types, which do include "scale", will use "scale"
-         as the default for decimal_return_scale, if not otherwise specified.
-
-         .. versionadded:: 0.9.0
-
-        :param \**kwargs: deprecated.  Additional arguments here are ignored
-         by the default :class:`.Float` type.  For database specific
-         floats that support additional arguments, see that dialect's
-         documentation for details, such as
-         :class:`sqlalchemy.dialects.mysql.FLOAT`.
-
-        """
-        self.precision = precision
-        self.asdecimal = asdecimal
-        self.decimal_return_scale = decimal_return_scale
-        if kwargs:
-            util.warn_deprecated("Additional keyword arguments "
-                                 "passed to Float ignored.")
-
-    def result_processor(self, dialect, coltype):
-        if self.asdecimal:
-            return processors.to_decimal_processor_factory(
-                decimal.Decimal,
-                self._effective_decimal_return_scale)
-        else:
-            return None
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.mul: {
-                Interval: Interval,
-                Numeric: self.__class__,
-            },
-            operators.div: {
-                Numeric: self.__class__,
-            },
-            operators.truediv: {
-                Numeric: self.__class__,
-            },
-            operators.add: {
-                Numeric: self.__class__,
-            },
-            operators.sub: {
-                Numeric: self.__class__,
-            }
-        }
-
-
-class DateTime(_DateAffinity, TypeEngine):
-
-    """A type for ``datetime.datetime()`` objects.
-
-    Date and time types return objects from the Python ``datetime``
-    module.  Most DBAPIs have built in support for the datetime
-    module, with the noted exception of SQLite.  In the case of
-    SQLite, date and time types are stored as strings which are then
-    converted back to datetime objects when rows are returned.
-
-    For the time representation within the datetime type, some
-    backends include additional options, such as timezone support and
-    fractional seconds support.  For fractional seconds, use the
-    dialect-specific datatype, such as :class:`.mysql.TIME`.  For
-    timezone support, use at least the :class:`~.types.TIMESTAMP` datatype,
-    if not the dialect-specific datatype object.
-
-    """
-
-    __visit_name__ = 'datetime'
-
-    def __init__(self, timezone=False):
-        """Construct a new :class:`.DateTime`.
-
-        :param timezone: boolean.  Indicates that the datetime type should
-         enable timezone support, if available on the
-         **base date/time-holding type only**.   It is recommended
-         to make use of the :class:`~.types.TIMESTAMP` datatype directly when
-         using this flag, as some databases include separate generic
-         date/time-holding types distinct from the timezone-capable
-         TIMESTAMP datatype, such as Oracle.
-
-
-        """
-        self.timezone = timezone
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.DATETIME
-
-    @property
-    def python_type(self):
-        return dt.datetime
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.add: {
-                Interval: self.__class__,
-            },
-            operators.sub: {
-                Interval: self.__class__,
-                DateTime: Interval,
-            },
-        }
-
-
-class Date(_DateAffinity, TypeEngine):
-
-    """A type for ``datetime.date()`` objects."""
-
-    __visit_name__ = 'date'
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.DATETIME
-
-    @property
-    def python_type(self):
-        return dt.date
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.add: {
-                Integer: self.__class__,
-                Interval: DateTime,
-                Time: DateTime,
-            },
-            operators.sub: {
-                # date - integer = date
-                Integer: self.__class__,
-
-                # date - date = integer.
-                Date: Integer,
-
-                Interval: DateTime,
-
-                # date - datetime = interval,
-                # this one is not in the PG docs
-                # but works
-                DateTime: Interval,
-            },
-        }
-
-
-class Time(_DateAffinity, TypeEngine):
-
-    """A type for ``datetime.time()`` objects."""
-
-    __visit_name__ = 'time'
-
-    def __init__(self, timezone=False):
-        self.timezone = timezone
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.DATETIME
-
-    @property
-    def python_type(self):
-        return dt.time
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.add: {
-                Date: DateTime,
-                Interval: self.__class__
-            },
-            operators.sub: {
-                Time: Interval,
-                Interval: self.__class__,
-            },
-        }
-
-
-class _Binary(TypeEngine):
-
-    """Define base behavior for binary types."""
-
-    def __init__(self, length=None):
-        self.length = length
-
-    def literal_processor(self, dialect):
-        def process(value):
-            value = value.decode(dialect.encoding).replace("'", "''")
-            return "'%s'" % value
-        return process
-
-    @property
-    def python_type(self):
-        return util.binary_type
-
-    # Python 3 - sqlite3 doesn't need the `Binary` conversion
-    # here, though pg8000 does to indicate "bytea"
-    def bind_processor(self, dialect):
-        if dialect.dbapi is None:
-            return None
-
-        DBAPIBinary = dialect.dbapi.Binary
-
-        def process(value):
-            if value is not None:
-                return DBAPIBinary(value)
-            else:
-                return None
-        return process
-
-    # Python 3 has native bytes() type
-    # both sqlite3 and pg8000 seem to return it,
-    # psycopg2 as of 2.5 returns 'memoryview'
-    if util.py2k:
-        def result_processor(self, dialect, coltype):
-            if util.jython:
-                def process(value):
-                    if value is not None:
-                        if isinstance(value, array.array):
-                            return value.tostring()
-                        return str(value)
-                    else:
-                        return None
-            else:
-                process = processors.to_str
-            return process
-    else:
-        def result_processor(self, dialect, coltype):
-            def process(value):
-                if value is not None:
-                    value = bytes(value)
-                return value
-            return process
-
-    def coerce_compared_value(self, op, value):
-        """See :meth:`.TypeEngine.coerce_compared_value` for a description."""
-
-        if isinstance(value, util.string_types):
-            return self
-        else:
-            return super(_Binary, self).coerce_compared_value(op, value)
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.BINARY
-
-
-class LargeBinary(_Binary):
-
-    """A type for large binary byte data.
-
-    The :class:`.LargeBinary` type corresponds to a large and/or unlengthed
-    binary type for the target platform, such as BLOB on MySQL and BYTEA for
-    PostgreSQL.  It also handles the necessary conversions for the DBAPI.
-
-    """
-
-    __visit_name__ = 'large_binary'
-
-    def __init__(self, length=None):
-        """
-        Construct a LargeBinary type.
-
-        :param length: optional, a length for the column for use in
-          DDL statements, for those binary types that accept a length,
-          such as the MySQL BLOB type.
-
-        """
-        _Binary.__init__(self, length=length)
-
-
-class Binary(LargeBinary):
-
-    """Deprecated.  Renamed to LargeBinary."""
-
-    def __init__(self, *arg, **kw):
-        util.warn_deprecated('The Binary type has been renamed to '
-                             'LargeBinary.')
-        LargeBinary.__init__(self, *arg, **kw)
-
-
-class SchemaType(SchemaEventTarget):
-
-    """Mark a type as possibly requiring schema-level DDL for usage.
-
-    Supports types that must be explicitly created/dropped (i.e. PG ENUM type)
-    as well as types that are complimented by table or schema level
-    constraints, triggers, and other rules.
-
-    :class:`.SchemaType` classes can also be targets for the
-    :meth:`.DDLEvents.before_parent_attach` and
-    :meth:`.DDLEvents.after_parent_attach` events, where the events fire off
-    surrounding the association of the type object with a parent
-    :class:`.Column`.
-
-    .. seealso::
-
-        :class:`.Enum`
-
-        :class:`.Boolean`
-
-
-    """
-
-    def __init__(self, name=None, schema=None, metadata=None,
-                 inherit_schema=False, quote=None, _create_events=True):
-        if name is not None:
-            self.name = quoted_name(name, quote)
-        else:
-            self.name = None
-        self.schema = schema
-        self.metadata = metadata
-        self.inherit_schema = inherit_schema
-        self._create_events = _create_events
-
-        if _create_events and self.metadata:
-            event.listen(
-                self.metadata,
-                "before_create",
-                util.portable_instancemethod(self._on_metadata_create)
-            )
-            event.listen(
-                self.metadata,
-                "after_drop",
-                util.portable_instancemethod(self._on_metadata_drop)
-            )
-
-    def _translate_schema(self, effective_schema, map_):
-        return map_.get(effective_schema, effective_schema)
-
-    def _set_parent(self, column):
-        column._on_table_attach(util.portable_instancemethod(self._set_table))
-
-    def _variant_mapping_for_set_table(self, column):
-        if isinstance(column.type, Variant):
-            variant_mapping = column.type.mapping.copy()
-            variant_mapping['_default'] = column.type.impl
-        else:
-            variant_mapping = None
-        return variant_mapping
-
-    def _set_table(self, column, table):
-        if self.inherit_schema:
-            self.schema = table.schema
-
-        if not self._create_events:
-            return
-
-        variant_mapping = self._variant_mapping_for_set_table(column)
-
-        event.listen(
-            table,
-            "before_create",
-            util.portable_instancemethod(
-                self._on_table_create,
-                {"variant_mapping": variant_mapping})
-        )
-        event.listen(
-            table,
-            "after_drop",
-            util.portable_instancemethod(
-                self._on_table_drop,
-                {"variant_mapping": variant_mapping})
-        )
-        if self.metadata is None:
-            # TODO: what's the difference between self.metadata
-            # and table.metadata here ?
-            event.listen(
-                table.metadata,
-                "before_create",
-                util.portable_instancemethod(
-                    self._on_metadata_create,
-                    {"variant_mapping": variant_mapping})
-            )
-            event.listen(
-                table.metadata,
-                "after_drop",
-                util.portable_instancemethod(
-                    self._on_metadata_drop,
-                    {"variant_mapping": variant_mapping})
-            )
-
-    def copy(self, **kw):
-        return self.adapt(self.__class__, _create_events=True)
-
-    def adapt(self, impltype, **kw):
-        schema = kw.pop('schema', self.schema)
-        metadata = kw.pop('metadata', self.metadata)
-        _create_events = kw.pop('_create_events', False)
-
-        return impltype(name=self.name,
-                        schema=schema,
-                        inherit_schema=self.inherit_schema,
-                        metadata=metadata,
-                        _create_events=_create_events,
-                        **kw)
-
-    @property
-    def bind(self):
-        return self.metadata and self.metadata.bind or None
-
-    def create(self, bind=None, checkfirst=False):
-        """Issue CREATE ddl for this type, if applicable."""
-
-        if bind is None:
-            bind = _bind_or_error(self)
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t.create(bind=bind, checkfirst=checkfirst)
-
-    def drop(self, bind=None, checkfirst=False):
-        """Issue DROP ddl for this type, if applicable."""
-
-        if bind is None:
-            bind = _bind_or_error(self)
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t.drop(bind=bind, checkfirst=checkfirst)
-
-    def _on_table_create(self, target, bind, **kw):
-        if not self._is_impl_for_variant(bind.dialect, kw):
-            return
-
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t._on_table_create(target, bind, **kw)
-
-    def _on_table_drop(self, target, bind, **kw):
-        if not self._is_impl_for_variant(bind.dialect, kw):
-            return
-
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t._on_table_drop(target, bind, **kw)
-
-    def _on_metadata_create(self, target, bind, **kw):
-        if not self._is_impl_for_variant(bind.dialect, kw):
-            return
-
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t._on_metadata_create(target, bind, **kw)
-
-    def _on_metadata_drop(self, target, bind, **kw):
-        if not self._is_impl_for_variant(bind.dialect, kw):
-            return
-
-        t = self.dialect_impl(bind.dialect)
-        if t.__class__ is not self.__class__ and isinstance(t, SchemaType):
-            t._on_metadata_drop(target, bind, **kw)
-
-    def _is_impl_for_variant(self, dialect, kw):
-        variant_mapping = kw.pop('variant_mapping', None)
-        if variant_mapping is None:
-            return True
-
-        if dialect.name in variant_mapping and \
-                variant_mapping[dialect.name] is self:
-            return True
-        elif dialect.name not in variant_mapping:
-            return variant_mapping['_default'] is self
-
-
-class Enum(String, SchemaType):
-
-    """Generic Enum Type.
-
-    The :class:`.Enum` type provides a set of possible string values
-    which the column is constrained towards.
-
-    The :class:`.Enum` type will make use of the backend's native "ENUM"
-    type if one is available; otherwise, it uses a VARCHAR datatype and
-    produces a CHECK constraint.  Use of the backend-native enum type
-    can be disabled using the :paramref:`.Enum.native_enum` flag, and
-    the production of the CHECK constraint is configurable using the
-    :paramref:`.Enum.create_constraint` flag.
-
-    The :class:`.Enum` type also provides in-Python validation of string
-    values during both read and write operations.  When reading a value
-    from the database in a result set, the string value is always checked
-    against the list of possible values and a ``LookupError`` is raised
-    if no match is found.  When passing a value to the database as a
-    plain string within a SQL statement, if the
-    :paramref:`.Enum.validate_strings` parameter is
-    set to True, a ``LookupError`` is raised for any string value that's
-    not located in the given list of possible values; note that this
-    impacts usage of LIKE expressions with enumerated values (an unusual
-    use case).
-
-    .. versionchanged:: 1.1 the :class:`.Enum` type now provides in-Python
-       validation of input values as well as on data being returned by
-       the database.
-
-    The source of enumerated values may be a list of string values, or
-    alternatively a PEP-435-compliant enumerated class.  For the purposes
-    of the :class:`.Enum` datatype, this class need only provide a
-    ``__members__`` method.
-
-    When using an enumerated class, the enumerated objects are used
-    both for input and output, rather than strings as is the case with
-    a plain-string enumerated type::
-
-        import enum
-        class MyEnum(enum.Enum):
-            one = 1
-            two = 2
-            three = 3
-
-
-        t = Table(
-            'data', MetaData(),
-            Column('value', Enum(MyEnum))
-        )
-
-        connection.execute(t.insert(), {"value": MyEnum.two})
-        assert connection.scalar(t.select()) is MyEnum.two
-
-    Above, the string names of each element, e.g. "one", "two", "three",
-    are persisted to the database; the values of the Python Enum, here
-    indicated as integers, are **not** used; the value of each enum can
-    therefore be any kind of Python object whether or not it is persistable.
-
-    .. versionadded:: 1.1 - support for PEP-435-style enumerated
-       classes.
-
-
-    .. seealso::
-
-        :class:`~.postgresql.ENUM` - PostgreSQL-specific type,
-        which has additional functionality.
-
-    """
-
-    __visit_name__ = 'enum'
-
-    def __init__(self, *enums, **kw):
-        r"""Construct an enum.
-
-        Keyword arguments which don't apply to a specific backend are ignored
-        by that backend.
-
-        :param \*enums: either exactly one PEP-435 compliant enumerated type
-           or one or more string or unicode enumeration labels. If unicode
-           labels are present, the `convert_unicode` flag is auto-enabled.
-
-           .. versionadded:: 1.1 a PEP-435 style enumerated class may be
-              passed.
-
-        :param convert_unicode: Enable unicode-aware bind parameter and
-           result-set processing for this Enum's data. This is set
-           automatically based on the presence of unicode label strings.
-
-        :param create_constraint: defaults to True.  When creating a non-native
-           enumerated type, also build a CHECK constraint on the database
-           against the valid values.
-
-           .. versionadded:: 1.1 - added :paramref:`.Enum.create_constraint`
-              which provides the option to disable the production of the
-              CHECK constraint for a non-native enumerated type.
-
-        :param metadata: Associate this type directly with a ``MetaData``
-           object. For types that exist on the target database as an
-           independent schema construct (PostgreSQL), this type will be
-           created and dropped within ``create_all()`` and ``drop_all()``
-           operations. If the type is not associated with any ``MetaData``
-           object, it will associate itself with each ``Table`` in which it is
-           used, and will be created when any of those individual tables are
-           created, after a check is performed for its existence. The type is
-           only dropped when ``drop_all()`` is called for that ``Table``
-           object's metadata, however.
-
-        :param name: The name of this type. This is required for PostgreSQL
-           and any future supported database which requires an explicitly
-           named type, or an explicitly named constraint in order to generate
-           the type and/or a table that uses it. If a PEP-435 enumerated
-           class was used, its name (converted to lower case) is used by
-           default.
-
-        :param native_enum: Use the database's native ENUM type when
-           available. Defaults to True. When False, uses VARCHAR + check
-           constraint for all backends.
-
-        :param schema: Schema name of this type. For types that exist on the
-           target database as an independent schema construct (PostgreSQL),
-           this parameter specifies the named schema in which the type is
-           present.
-
-           .. note::
-
-                The ``schema`` of the :class:`.Enum` type does not
-                by default make use of the ``schema`` established on the
-                owning :class:`.Table`.  If this behavior is desired,
-                set the ``inherit_schema`` flag to ``True``.
-
-        :param quote: Set explicit quoting preferences for the type's name.
-
-        :param inherit_schema: When ``True``, the "schema" from the owning
-           :class:`.Table` will be copied to the "schema" attribute of this
-           :class:`.Enum`, replacing whatever value was passed for the
-           ``schema`` attribute.   This also takes effect when using the
-           :meth:`.Table.tometadata` operation.
-
-        :param validate_strings: when True, string values that are being
-           passed to the database in a SQL statement will be checked
-           for validity against the list of enumerated values.  Unrecognized
-           values will result in a ``LookupError`` being raised.
-
-           .. versionadded:: 1.1.0b2
-
-        """
-
-        values, objects = self._parse_into_values(enums, kw)
-        self._setup_for_values(values, objects, kw)
-
-        self.native_enum = kw.pop('native_enum', True)
-        convert_unicode = kw.pop('convert_unicode', None)
-        self.create_constraint = kw.pop('create_constraint', True)
-        self.validate_strings = kw.pop('validate_strings', False)
-
-        if convert_unicode is None:
-            for e in self.enums:
-                if isinstance(e, util.text_type):
-                    convert_unicode = True
-                    break
-            else:
-                convert_unicode = False
-
-        if self.enums:
-            length = max(len(x) for x in self.enums)
-        else:
-            length = 0
-        self._valid_lookup[None] = self._object_lookup[None] = None
-
-        String.__init__(self,
-                        length=length,
-                        convert_unicode=convert_unicode,
-                        )
-        SchemaType.__init__(self, **kw)
-
-    def _parse_into_values(self, enums, kw):
-        if len(enums) == 1 and hasattr(enums[0], '__members__'):
-            self.enum_class = enums[0]
-            values = list(self.enum_class.__members__)
-            objects = [self.enum_class.__members__[k] for k in values]
-            kw.setdefault('name', self.enum_class.__name__.lower())
-
-            return values, objects
-        else:
-            self.enum_class = None
-            return enums, enums
-
-    def _setup_for_values(self, values, objects, kw):
-        self.enums = list(values)
-
-        self._valid_lookup = dict(
-            zip(objects, values)
-        )
-        self._object_lookup = dict(
-            (value, key) for key, value in self._valid_lookup.items()
-        )
-        self._valid_lookup.update(
-            [(value, value) for value in self._valid_lookup.values()]
-        )
-
-    def _db_value_for_elem(self, elem):
-        try:
-            return self._valid_lookup[elem]
-        except KeyError:
-            # for unknown string values, we return as is.  While we can
-            # validate these if we wanted, that does not allow for lesser-used
-            # end-user use cases, such as using a LIKE comparison with an enum,
-            # or for an application that wishes to apply string tests to an
-            # ENUM (see [ticket:3725]).  While we can decide to differentiate
-            # here between an INSERT statement and a criteria used in a SELECT,
-            # for now we're staying conservative w/ behavioral changes (perhaps
-            # someone has a trigger that handles strings on INSERT)
-            if not self.validate_strings and \
-                    isinstance(elem, compat.string_types):
-                return elem
-            else:
-                raise LookupError(
-                    '"%s" is not among the defined enum values' % elem)
-
-    class Comparator(String.Comparator):
-
-        def _adapt_expression(self, op, other_comparator):
-            op, typ = super(Enum.Comparator, self)._adapt_expression(
-                op, other_comparator)
-            if op is operators.concat_op:
-                typ = String(
-                    self.type.length,
-                    convert_unicode=self.type.convert_unicode)
-            return op, typ
-
-    comparator_factory = Comparator
-
-    def _object_value_for_elem(self, elem):
-        try:
-            return self._object_lookup[elem]
-        except KeyError:
-            raise LookupError(
-                '"%s" is not among the defined enum values' % elem)
-
-    def __repr__(self):
-        return util.generic_repr(self,
-                                 additional_kw=[('native_enum', True)],
-                                 to_inspect=[Enum, SchemaType],
-                                 )
-
-    def _should_create_constraint(self, compiler, **kw):
-        if not self._is_impl_for_variant(compiler.dialect, kw):
-            return False
-        return not self.native_enum or \
-            not compiler.dialect.supports_native_enum
-
-    @util.dependencies("sqlalchemy.sql.schema")
-    def _set_table(self, schema, column, table):
-        if self.native_enum:
-            SchemaType._set_table(self, column, table)
-
-        if not self.create_constraint:
-            return
-
-        variant_mapping = self._variant_mapping_for_set_table(column)
-
-        e = schema.CheckConstraint(
-            type_coerce(column, self).in_(self.enums),
-            name=_defer_name(self.name),
-            _create_rule=util.portable_instancemethod(
-                self._should_create_constraint,
-                {"variant_mapping": variant_mapping}),
-            _type_bound=True
-        )
-        assert e.table is table
-
-    def copy(self, **kw):
-        return SchemaType.copy(self, **kw)
-
-    def adapt(self, impltype, **kw):
-        schema = kw.pop('schema', self.schema)
-        metadata = kw.pop('metadata', self.metadata)
-        _create_events = kw.pop('_create_events', False)
-        if issubclass(impltype, Enum):
-            if self.enum_class is not None:
-                args = [self.enum_class]
-            else:
-                args = self.enums
-            return impltype(name=self.name,
-                            schema=schema,
-                            metadata=metadata,
-                            convert_unicode=self.convert_unicode,
-                            native_enum=self.native_enum,
-                            inherit_schema=self.inherit_schema,
-                            validate_strings=self.validate_strings,
-                            _create_events=_create_events,
-                            *args,
-                            **kw)
-        else:
-            # TODO: why would we be here?
-            return super(Enum, self).adapt(impltype, **kw)
-
-    def literal_processor(self, dialect):
-        parent_processor = super(Enum, self).literal_processor(dialect)
-
-        def process(value):
-            value = self._db_value_for_elem(value)
-            if parent_processor:
-                value = parent_processor(value)
-            return value
-        return process
-
-    def bind_processor(self, dialect):
-        def process(value):
-            value = self._db_value_for_elem(value)
-            if parent_processor:
-                value = parent_processor(value)
-            return value
-
-        parent_processor = super(Enum, self).bind_processor(dialect)
-        return process
-
-    def result_processor(self, dialect, coltype):
-        parent_processor = super(Enum, self).result_processor(
-            dialect, coltype)
-
-        def process(value):
-            if parent_processor:
-                value = parent_processor(value)
-
-            value = self._object_value_for_elem(value)
-            return value
-
-        return process
-
-    @property
-    def python_type(self):
-        if self.enum_class:
-            return self.enum_class
-        else:
-            return super(Enum, self).python_type
-
-
-class PickleType(TypeDecorator):
-    """Holds Python objects, which are serialized using pickle.
-
-    PickleType builds upon the Binary type to apply Python's
-    ``pickle.dumps()`` to incoming objects, and ``pickle.loads()`` on
-    the way out, allowing any pickleable Python object to be stored as
-    a serialized binary field.
-
-    To allow ORM change events to propagate for elements associated
-    with :class:`.PickleType`, see :ref:`mutable_toplevel`.
-
-    """
-
-    impl = LargeBinary
-
-    def __init__(self, protocol=pickle.HIGHEST_PROTOCOL,
-                 pickler=None, comparator=None):
-        """
-        Construct a PickleType.
-
-        :param protocol: defaults to ``pickle.HIGHEST_PROTOCOL``.
-
-        :param pickler: defaults to cPickle.pickle or pickle.pickle if
-          cPickle is not available.  May be any object with
-          pickle-compatible ``dumps` and ``loads`` methods.
-
-        :param comparator: a 2-arg callable predicate used
-          to compare values of this type.  If left as ``None``,
-          the Python "equals" operator is used to compare values.
-
-        """
-        self.protocol = protocol
-        self.pickler = pickler or pickle
-        self.comparator = comparator
-        super(PickleType, self).__init__()
-
-    def __reduce__(self):
-        return PickleType, (self.protocol,
-                            None,
-                            self.comparator)
-
-    def bind_processor(self, dialect):
-        impl_processor = self.impl.bind_processor(dialect)
-        dumps = self.pickler.dumps
-        protocol = self.protocol
-        if impl_processor:
-            def process(value):
-                if value is not None:
-                    value = dumps(value, protocol)
-                return impl_processor(value)
-        else:
-            def process(value):
-                if value is not None:
-                    value = dumps(value, protocol)
-                return value
-        return process
-
-    def result_processor(self, dialect, coltype):
-        impl_processor = self.impl.result_processor(dialect, coltype)
-        loads = self.pickler.loads
-        if impl_processor:
-            def process(value):
-                value = impl_processor(value)
-                if value is None:
-                    return None
-                return loads(value)
-        else:
-            def process(value):
-                if value is None:
-                    return None
-                return loads(value)
-        return process
-
-    def compare_values(self, x, y):
-        if self.comparator:
-            return self.comparator(x, y)
-        else:
-            return x == y
-
-
-class Boolean(TypeEngine, SchemaType):
-
-    """A bool datatype.
-
-    Boolean typically uses BOOLEAN or SMALLINT on the DDL side, and on
-    the Python side deals in ``True`` or ``False``.
-
-    """
-
-    __visit_name__ = 'boolean'
-
-    def __init__(
-            self, create_constraint=True, name=None, _create_events=True):
-        """Construct a Boolean.
-
-        :param create_constraint: defaults to True.  If the boolean
-          is generated as an int/smallint, also create a CHECK constraint
-          on the table that ensures 1 or 0 as a value.
-
-        :param name: if a CHECK constraint is generated, specify
-          the name of the constraint.
-
-        """
-        self.create_constraint = create_constraint
-        self.name = name
-        self._create_events = _create_events
-
-    def _should_create_constraint(self, compiler, **kw):
-        if not self._is_impl_for_variant(compiler.dialect, kw):
-            return False
-        return not compiler.dialect.supports_native_boolean
-
-    @util.dependencies("sqlalchemy.sql.schema")
-    def _set_table(self, schema, column, table):
-        if not self.create_constraint:
-            return
-
-        variant_mapping = self._variant_mapping_for_set_table(column)
-
-        e = schema.CheckConstraint(
-            type_coerce(column, self).in_([0, 1]),
-            name=_defer_name(self.name),
-            _create_rule=util.portable_instancemethod(
-                self._should_create_constraint,
-                {"variant_mapping": variant_mapping}),
-            _type_bound=True
-        )
-        assert e.table is table
-
-    @property
-    def python_type(self):
-        return bool
-
-    def literal_processor(self, dialect):
-        if dialect.supports_native_boolean:
-            def process(value):
-                return "true" if value else "false"
-        else:
-            def process(value):
-                return str(1 if value else 0)
-        return process
-
-    def bind_processor(self, dialect):
-        if dialect.supports_native_boolean:
-            return None
-        else:
-            return processors.boolean_to_int
-
-    def result_processor(self, dialect, coltype):
-        if dialect.supports_native_boolean:
-            return None
-        else:
-            return processors.int_to_boolean
-
-
-class Interval(_DateAffinity, TypeDecorator):
-
-    """A type for ``datetime.timedelta()`` objects.
-
-    The Interval type deals with ``datetime.timedelta`` objects.  In
-    PostgreSQL, the native ``INTERVAL`` type is used; for others, the
-    value is stored as a date which is relative to the "epoch"
-    (Jan. 1, 1970).
-
-    Note that the ``Interval`` type does not currently provide date arithmetic
-    operations on platforms which do not support interval types natively. Such
-    operations usually require transformation of both sides of the expression
-    (such as, conversion of both sides into integer epoch values first) which
-    currently is a manual procedure (such as via
-    :attr:`~sqlalchemy.sql.expression.func`).
-
-    """
-
-    impl = DateTime
-    epoch = dt.datetime.utcfromtimestamp(0)
-
-    def __init__(self, native=True,
-                 second_precision=None,
-                 day_precision=None):
-        """Construct an Interval object.
-
-        :param native: when True, use the actual
-          INTERVAL type provided by the database, if
-          supported (currently PostgreSQL, Oracle).
-          Otherwise, represent the interval data as
-          an epoch value regardless.
-
-        :param second_precision: For native interval types
-          which support a "fractional seconds precision" parameter,
-          i.e. Oracle and PostgreSQL
-
-        :param day_precision: for native interval types which
-          support a "day precision" parameter, i.e. Oracle.
-
-        """
-        super(Interval, self).__init__()
-        self.native = native
-        self.second_precision = second_precision
-        self.day_precision = day_precision
-
-    def adapt(self, cls, **kw):
-        if self.native and hasattr(cls, '_adapt_from_generic_interval'):
-            return cls._adapt_from_generic_interval(self, **kw)
-        else:
-            return self.__class__(
-                native=self.native,
-                second_precision=self.second_precision,
-                day_precision=self.day_precision,
-                **kw)
-
-    @property
-    def python_type(self):
-        return dt.timedelta
-
-    def bind_processor(self, dialect):
-        impl_processor = self.impl.bind_processor(dialect)
-        epoch = self.epoch
-        if impl_processor:
-            def process(value):
-                if value is not None:
-                    value = epoch + value
-                return impl_processor(value)
-        else:
-            def process(value):
-                if value is not None:
-                    value = epoch + value
-                return value
-        return process
-
-    def result_processor(self, dialect, coltype):
-        impl_processor = self.impl.result_processor(dialect, coltype)
-        epoch = self.epoch
-        if impl_processor:
-            def process(value):
-                value = impl_processor(value)
-                if value is None:
-                    return None
-                return value - epoch
-        else:
-            def process(value):
-                if value is None:
-                    return None
-                return value - epoch
-        return process
-
-    @util.memoized_property
-    def _expression_adaptations(self):
-        return {
-            operators.add: {
-                Date: DateTime,
-                Interval: self.__class__,
-                DateTime: DateTime,
-                Time: Time,
-            },
-            operators.sub: {
-                Interval: self.__class__
-            },
-            operators.mul: {
-                Numeric: self.__class__
-            },
-            operators.truediv: {
-                Numeric: self.__class__
-            },
-            operators.div: {
-                Numeric: self.__class__
-            }
-        }
-
-    @property
-    def _type_affinity(self):
-        return Interval
-
-    def coerce_compared_value(self, op, value):
-        """See :meth:`.TypeEngine.coerce_compared_value` for a description."""
-
-        return self.impl.coerce_compared_value(op, value)
-
-
-class JSON(Indexable, TypeEngine):
-    """Represent a SQL JSON type.
-
-    .. note::  :class:`.types.JSON` is provided as a facade for vendor-specific
-       JSON types.  Since it supports JSON SQL operations, it only
-       works on backends that have an actual JSON type, currently
-       PostgreSQL as well as certain versions of MySQL.
-
-    :class:`.types.JSON` is part of the Core in support of the growing
-    popularity of native JSON datatypes.
-
-    The :class:`.types.JSON` type stores arbitrary JSON format data, e.g.::
-
-        data_table = Table('data_table', metadata,
-            Column('id', Integer, primary_key=True),
-            Column('data', JSON)
-        )
-
-        with engine.connect() as conn:
-            conn.execute(
-                data_table.insert(),
-                data = {"key1": "value1", "key2": "value2"}
-            )
-
-    The base :class:`.types.JSON` provides these two operations:
-
-    * Keyed index operations::
-
-        data_table.c.data['some key']
-
-    * Integer index operations::
-
-        data_table.c.data[3]
-
-    * Path index operations::
-
-        data_table.c.data[('key_1', 'key_2', 5, ..., 'key_n')]
-
-    Additional operations are available from the dialect-specific versions
-    of :class:`.types.JSON`, such as :class:`.postgresql.JSON` and
-    :class:`.postgresql.JSONB`, each of which offer more operators than
-    just the basic type.
-
-    Index operations return an expression object whose type defaults to
-    :class:`.JSON` by default, so that further JSON-oriented instructions
-    may be called upon the result type.   Note that there are backend-specific
-    idiosyncracies here, including that the Postgresql database does not generally
-    compare a "json" to a "json" structure without type casts.  These idiosyncracies
-    can be accommodated in a backend-neutral way by by making explicit use
-    of the :func:`.cast` and :func:`.type_coerce` constructs.
-    Comparison of specific index elements of a :class:`.JSON` object
-    to other objects work best if the **left hand side is CAST to a string**
-    and the **right hand side is rendered as a json string**; a future SQLAlchemy
-    feature such as a generic "astext" modifier may simplify this at some point:
-
-    * **Compare an element of a JSON structure to a string**::
-
-        from sqlalchemy import cast, type_coerce
-        from sqlalchemy import String, JSON
-
-        cast(
-            data_table.c.data['some_key'], String
-        ) == '"some_value"'
-
-        cast(
-            data_table.c.data['some_key'], String
-        ) == type_coerce("some_value", JSON)
-
-    * **Compare an element of a JSON structure to an integer**::
-
-        from sqlalchemy import cast, type_coerce
-        from sqlalchemy import String, JSON
-
-        cast(data_table.c.data['some_key'], String) == '55'
-
-        cast(
-            data_table.c.data['some_key'], String
-        ) == type_coerce(55, JSON)
-
-    * **Compare an element of a JSON structure to some other JSON structure** - note
-      that Python dictionaries are typically not ordered so care should be taken
-      here to assert that the JSON structures are identical::
-
-        from sqlalchemy import cast, type_coerce
-        from sqlalchemy import String, JSON
-        import json
-
-        cast(
-            data_table.c.data['some_key'], String
-        ) == json.dumps({"foo": "bar"})
-
-        cast(
-            data_table.c.data['some_key'], String
-        ) == type_coerce({"foo": "bar"}, JSON)
-
-    The :class:`.JSON` type, when used with the SQLAlchemy ORM, does not
-    detect in-place mutations to the structure.  In order to detect these, the
-    :mod:`sqlalchemy.ext.mutable` extension must be used.  This extension will
-    allow "in-place" changes to the datastructure to produce events which
-    will be detected by the unit of work.  See the example at :class:`.HSTORE`
-    for a simple example involving a dictionary.
-
-    When working with NULL values, the :class:`.JSON` type recommends the
-    use of two specific constants in order to differentiate between a column
-    that evaluates to SQL NULL, e.g. no value, vs. the JSON-encoded string
-    of ``"null"``.   To insert or select against a value that is SQL NULL,
-    use the constant :func:`.null`::
-
-        from sqlalchemy import null
-        conn.execute(table.insert(), json_value=null())
-
-    To insert or select against a value that is JSON ``"null"``, use the
-    constant :attr:`.JSON.NULL`::
-
-        conn.execute(table.insert(), json_value=JSON.NULL)
-
-    The :class:`.JSON` type supports a flag
-    :paramref:`.JSON.none_as_null` which when set to True will result
-    in the Python constant ``None`` evaluating to the value of SQL
-    NULL, and when set to False results in the Python constant
-    ``None`` evaluating to the value of JSON ``"null"``.    The Python
-    value ``None`` may be used in conjunction with either
-    :attr:`.JSON.NULL` and :func:`.null` in order to indicate NULL
-    values, but care must be taken as to the value of the
-    :paramref:`.JSON.none_as_null` in these cases.
-
-    .. seealso::
-
-        :class:`.postgresql.JSON`
-
-        :class:`.postgresql.JSONB`
-
-        :class:`.mysql.JSON`
-
-    .. versionadded:: 1.1
-
-
-    """
-    __visit_name__ = 'JSON'
-
-    hashable = False
-    NULL = util.symbol('JSON_NULL')
-    """Describe the json value of NULL.
-
-    This value is used to force the JSON value of ``"null"`` to be
-    used as the value.   A value of Python ``None`` will be recognized
-    either as SQL NULL or JSON ``"null"``, based on the setting
-    of the :paramref:`.JSON.none_as_null` flag; the :attr:`.JSON.NULL`
-    constant can be used to always resolve to JSON ``"null"`` regardless
-    of this setting.  This is in contrast to the :func:`.sql.null` construct,
-    which always resolves to SQL NULL.  E.g.::
-
-        from sqlalchemy import null
-        from sqlalchemy.dialects.postgresql import JSON
-
-        obj1 = MyObject(json_value=null())  # will *always* insert SQL NULL
-        obj2 = MyObject(json_value=JSON.NULL)  # will *always* insert JSON string "null"
-
-        session.add_all([obj1, obj2])
-        session.commit()
-
-    In order to set JSON NULL as a default value for a column, the most
-    transparent method is to use :func:`.text`::
-
-        Table(
-            'my_table', metadata,
-            Column('json_data', JSON, default=text("'null'"))
-        )
-
-    While it is possible to use :attr:`.JSON.NULL` in this context, the
-    :attr:`.JSON.NULL` value will be returned as the value of the column,
-    which in the context of the ORM or other repurposing of the default
-    value, may not be desirable.  Using a SQL expression means the value
-    will be re-fetched from the database within the context of retrieving
-    generated defaults.
-
-
-    """
-
-    def __init__(self, none_as_null=False):
-        """Construct a :class:`.types.JSON` type.
-
-        :param none_as_null=False: if True, persist the value ``None`` as a
-         SQL NULL value, not the JSON encoding of ``null``.   Note that
-         when this flag is False, the :func:`.null` construct can still
-         be used to persist a NULL value::
-
-             from sqlalchemy import null
-             conn.execute(table.insert(), data=null())
-
-         .. note::
-
-              :paramref:`.JSON.none_as_null` does **not** apply to the
-              values passed to :paramref:`.Column.default` and
-              :paramref:`.Column.server_default`; a value of ``None`` passed for
-              these parameters means "no default present".
-
-         .. seealso::
-
-              :attr:`.types.JSON.NULL`
-
-         """
-        self.none_as_null = none_as_null
-
-    class JSONElementType(TypeEngine):
-        """common function for index / path elements in a JSON expression."""
-
-        _integer = Integer()
-        _string = String()
-
-        def string_bind_processor(self, dialect):
-            return self._string._cached_bind_processor(dialect)
-
-        def string_literal_processor(self, dialect):
-            return self._string._cached_literal_processor(dialect)
-
-        def bind_processor(self, dialect):
-            int_processor = self._integer._cached_bind_processor(dialect)
-            string_processor = self.string_bind_processor(dialect)
-
-            def process(value):
-                if int_processor and isinstance(value, int):
-                    value = int_processor(value)
-                elif string_processor and isinstance(value, util.string_types):
-                    value = string_processor(value)
-                return value
-
-            return process
-
-        def literal_processor(self, dialect):
-            int_processor = self._integer._cached_literal_processor(dialect)
-            string_processor = self.string_literal_processor(dialect)
-
-            def process(value):
-                if int_processor and isinstance(value, int):
-                    value = int_processor(value)
-                elif string_processor and isinstance(value, util.string_types):
-                    value = string_processor(value)
-                return value
-
-            return process
-
-    class JSONIndexType(JSONElementType):
-        """Placeholder for the datatype of a JSON index value.
-
-        This allows execution-time processing of JSON index values
-        for special syntaxes.
-
-        """
-
-    class JSONPathType(JSONElementType):
-        """Placeholder type for JSON path operations.
-
-        This allows execution-time processing of a path-based
-        index value into a specific SQL syntax.
-
-        """
-
-    class Comparator(Indexable.Comparator, Concatenable.Comparator):
-        """Define comparison operations for :class:`.types.JSON`."""
-
-        @util.dependencies('sqlalchemy.sql.default_comparator')
-        def _setup_getitem(self, default_comparator, index):
-            if not isinstance(index, util.string_types) and \
-                    isinstance(index, collections.Sequence):
-                index = default_comparator._check_literal(
-                    self.expr, operators.json_path_getitem_op,
-                    index, bindparam_type=JSON.JSONPathType
-                )
-
-                operator = operators.json_path_getitem_op
-            else:
-                index = default_comparator._check_literal(
-                    self.expr, operators.json_getitem_op,
-                    index, bindparam_type=JSON.JSONIndexType
-                )
-                operator = operators.json_getitem_op
-
-            return operator, index, self.type
-
-    comparator_factory = Comparator
-
-    @property
-    def python_type(self):
-        return dict
-
-    @property
-    def should_evaluate_none(self):
-        return not self.none_as_null
-
-    @util.memoized_property
-    def _str_impl(self):
-        return String(convert_unicode=True)
-
-    def bind_processor(self, dialect):
-        string_process = self._str_impl.bind_processor(dialect)
-
-        json_serializer = dialect._json_serializer or json.dumps
-
-        def process(value):
-            if value is self.NULL:
-                value = None
-            elif isinstance(value, elements.Null) or (
-                value is None and self.none_as_null
-            ):
-                return None
-
-            serialized = json_serializer(value)
-            if string_process:
-                serialized = string_process(serialized)
-            return serialized
-
-        return process
-
-    def result_processor(self, dialect, coltype):
-        string_process = self._str_impl.result_processor(dialect, coltype)
-        json_deserializer = dialect._json_deserializer or json.loads
-
-        def process(value):
-            if value is None:
-                return None
-            if string_process:
-                value = string_process(value)
-            return json_deserializer(value)
-        return process
-
-
-class ARRAY(Indexable, Concatenable, TypeEngine):
-    """Represent a SQL Array type.
-
-    .. note::  This type serves as the basis for all ARRAY operations.
-       However, currently **only the PostgreSQL backend has support
-       for SQL arrays in SQLAlchemy**.  It is recommended to use the
-       :class:`.postgresql.ARRAY` type directly when using ARRAY types
-       with PostgreSQL, as it provides additional operators specific
-       to that backend.
-
-    :class:`.types.ARRAY` is part of the Core in support of various SQL standard
-    functions such as :class:`.array_agg` which explicitly involve arrays;
-    however, with the exception of the PostgreSQL backend and possibly
-    some third-party dialects, no other SQLAlchemy built-in dialect has
-    support for this type.
-
-    An :class:`.types.ARRAY` type is constructed given the "type"
-    of element::
-
-        mytable = Table("mytable", metadata,
-                Column("data", ARRAY(Integer))
-            )
-
-    The above type represents an N-dimensional array,
-    meaning a supporting backend such as PostgreSQL will interpret values
-    with any number of dimensions automatically.   To produce an INSERT
-    construct that passes in a 1-dimensional array of integers::
-
-        connection.execute(
-                mytable.insert(),
-                data=[1,2,3]
-        )
-
-    The :class:`.types.ARRAY` type can be constructed given a fixed number
-    of dimensions::
-
-        mytable = Table("mytable", metadata,
-                Column("data", ARRAY(Integer, dimensions=2))
-            )
-
-    Sending a number of dimensions is optional, but recommended if the
-    datatype is to represent arrays of more than one dimension.  This number
-    is used:
-
-    * When emitting the type declaration itself to the database, e.g.
-      ``INTEGER[][]``
-
-    * When translating Python values to database values, and vice versa, e.g.
-      an ARRAY of :class:`.Unicode` objects uses this number to efficiently
-      access the string values inside of array structures without resorting
-      to per-row type inspection
-
-    * When used with the Python ``getitem`` accessor, the number of dimensions
-      serves to define the kind of type that the ``[]`` operator should
-      return, e.g. for an ARRAY of INTEGER with two dimensions::
-
-            >>> expr = table.c.column[5]  # returns ARRAY(Integer, dimensions=1)
-            >>> expr = expr[6]  # returns Integer
-
-    For 1-dimensional arrays, an :class:`.types.ARRAY` instance with no
-    dimension parameter will generally assume single-dimensional behaviors.
-
-    SQL expressions of type :class:`.types.ARRAY` have support for "index" and
-    "slice" behavior.  The Python ``[]`` operator works normally here, given
-    integer indexes or slices.  Arrays default to 1-based indexing.
-    The operator produces binary expression
-    constructs which will produce the appropriate SQL, both for
-    SELECT statements::
-
-        select([mytable.c.data[5], mytable.c.data[2:7]])
-
-    as well as UPDATE statements when the :meth:`.Update.values` method
-    is used::
-
-        mytable.update().values({
-            mytable.c.data[5]: 7,
-            mytable.c.data[2:7]: [1, 2, 3]
-        })
-
-    The :class:`.types.ARRAY` type also provides for the operators
-    :meth:`.types.ARRAY.Comparator.any` and :meth:`.types.ARRAY.Comparator.all`.
-    The PostgreSQL-specific version of :class:`.types.ARRAY` also provides additional
-    operators.
-
-    .. versionadded:: 1.1.0
-
-    .. seealso::
-
-        :class:`.postgresql.ARRAY`
-
-    """
-    __visit_name__ = 'ARRAY'
-
-    zero_indexes = False
-    """if True, Python zero-based indexes should be interpreted as one-based
-    on the SQL expression side."""
-
-    class Comparator(Indexable.Comparator, Concatenable.Comparator):
-
-        """Define comparison operations for :class:`.types.ARRAY`.
-
-        More operators are available on the dialect-specific form
-        of this type.  See :class:`.postgresql.ARRAY.Comparator`.
-
-        """
-
-        def _setup_getitem(self, index):
-            if isinstance(index, slice):
-                return_type = self.type
-                if self.type.zero_indexes:
-                    index = slice(
-                        index.start + 1,
-                        index.stop + 1,
-                        index.step
-                    )
-                index = Slice(
-                    _literal_as_binds(
-                        index.start, name=self.expr.key,
-                        type_=type_api.INTEGERTYPE),
-                    _literal_as_binds(
-                        index.stop, name=self.expr.key,
-                        type_=type_api.INTEGERTYPE),
-                    _literal_as_binds(
-                        index.step, name=self.expr.key,
-                        type_=type_api.INTEGERTYPE)
-                )
-            else:
-                if self.type.zero_indexes:
-                    index += 1
-                if self.type.dimensions is None or self.type.dimensions == 1:
-                    return_type = self.type.item_type
-                else:
-                    adapt_kw = {'dimensions': self.type.dimensions - 1}
-                    return_type = self.type.adapt(
-                        self.type.__class__, **adapt_kw)
-
-            return operators.getitem, index, return_type
-
-        @util.dependencies("sqlalchemy.sql.elements")
-        def any(self, elements, other, operator=None):
-            """Return ``other operator ANY (array)`` clause.
-
-            Argument places are switched, because ANY requires array
-            expression to be on the right hand-side.
-
-            E.g.::
-
-                from sqlalchemy.sql import operators
-
-                conn.execute(
-                    select([table.c.data]).where(
-                            table.c.data.any(7, operator=operators.lt)
-                        )
-                )
-
-            :param other: expression to be compared
-            :param operator: an operator object from the
-             :mod:`sqlalchemy.sql.operators`
-             package, defaults to :func:`.operators.eq`.
-
-            .. seealso::
-
-                :func:`.sql.expression.any_`
-
-                :meth:`.types.ARRAY.Comparator.all`
-
-            """
-            operator = operator if operator else operators.eq
-            return operator(
-                elements._literal_as_binds(other),
-                elements.CollectionAggregate._create_any(self.expr)
-            )
-
-        @util.dependencies("sqlalchemy.sql.elements")
-        def all(self, elements, other, operator=None):
-            """Return ``other operator ALL (array)`` clause.
-
-            Argument places are switched, because ALL requires array
-            expression to be on the right hand-side.
-
-            E.g.::
-
-                from sqlalchemy.sql import operators
-
-                conn.execute(
-                    select([table.c.data]).where(
-                            table.c.data.all(7, operator=operators.lt)
-                        )
-                )
-
-            :param other: expression to be compared
-            :param operator: an operator object from the
-             :mod:`sqlalchemy.sql.operators`
-             package, defaults to :func:`.operators.eq`.
-
-            .. seealso::
-
-                :func:`.sql.expression.all_`
-
-                :meth:`.types.ARRAY.Comparator.any`
-
-            """
-            operator = operator if operator else operators.eq
-            return operator(
-                elements._literal_as_binds(other),
-                elements.CollectionAggregate._create_all(self.expr)
-            )
-
-    comparator_factory = Comparator
-
-    def __init__(self, item_type, as_tuple=False, dimensions=None,
-                 zero_indexes=False):
-        """Construct an :class:`.types.ARRAY`.
-
-        E.g.::
-
-          Column('myarray', ARRAY(Integer))
-
-        Arguments are:
-
-        :param item_type: The data type of items of this array. Note that
-          dimensionality is irrelevant here, so multi-dimensional arrays like
-          ``INTEGER[][]``, are constructed as ``ARRAY(Integer)``, not as
-          ``ARRAY(ARRAY(Integer))`` or such.
-
-        :param as_tuple=False: Specify whether return results
-          should be converted to tuples from lists.  This parameter is
-          not generally needed as a Python list corresponds well
-          to a SQL array.
-
-        :param dimensions: if non-None, the ARRAY will assume a fixed
-         number of dimensions.   This impacts how the array is declared
-         on the database, how it goes about interpreting Python and
-         result values, as well as how expression behavior in conjunction
-         with the "getitem" operator works.  See the description at
-         :class:`.types.ARRAY` for additional detail.
-
-        :param zero_indexes=False: when True, index values will be converted
-         between Python zero-based and SQL one-based indexes, e.g.
-         a value of one will be added to all index values before passing
-         to the database.
-
-        """
-        if isinstance(item_type, ARRAY):
-            raise ValueError("Do not nest ARRAY types; ARRAY(basetype) "
-                             "handles multi-dimensional arrays of basetype")
-        if isinstance(item_type, type):
-            item_type = item_type()
-        self.item_type = item_type
-        self.as_tuple = as_tuple
-        self.dimensions = dimensions
-        self.zero_indexes = zero_indexes
-
-    @property
-    def hashable(self):
-        return self.as_tuple
-
-    @property
-    def python_type(self):
-        return list
-
-    def compare_values(self, x, y):
-        return x == y
-
-
-class REAL(Float):
-
-    """The SQL REAL type."""
-
-    __visit_name__ = 'REAL'
-
-
-class FLOAT(Float):
-
-    """The SQL FLOAT type."""
-
-    __visit_name__ = 'FLOAT'
-
-
-class NUMERIC(Numeric):
-
-    """The SQL NUMERIC type."""
-
-    __visit_name__ = 'NUMERIC'
-
-
-class DECIMAL(Numeric):
-
-    """The SQL DECIMAL type."""
-
-    __visit_name__ = 'DECIMAL'
-
-
-class INTEGER(Integer):
-
-    """The SQL INT or INTEGER type."""
-
-    __visit_name__ = 'INTEGER'
-INT = INTEGER
-
-
-class SMALLINT(SmallInteger):
-
-    """The SQL SMALLINT type."""
-
-    __visit_name__ = 'SMALLINT'
-
-
-class BIGINT(BigInteger):
-
-    """The SQL BIGINT type."""
-
-    __visit_name__ = 'BIGINT'
-
-
-class TIMESTAMP(DateTime):
-
-    """The SQL TIMESTAMP type.
-
-    :class:`~.types.TIMESTAMP` datatypes have support for timezone
-    storage on some backends, such as PostgreSQL and Oracle.  Use the
-    :paramref:`~types.TIMESTAMP.timezone` argument in order to enable
-    "TIMESTAMP WITH TIMEZONE" for these backends.
-
-    """
-
-    __visit_name__ = 'TIMESTAMP'
-
-    def __init__(self, timezone=False):
-        """Construct a new :class:`.TIMESTAMP`.
-
-        :param timezone: boolean.  Indicates that the TIMESTAMP type should
-         enable timezone support, if available on the target database.
-         On a per-dialect basis is similar to "TIMESTAMP WITH TIMEZONE".
-         If the target database does not support timezones, this flag is
-         ignored.
-
-
-        """
-        super(TIMESTAMP, self).__init__(timezone=timezone)
-
-    def get_dbapi_type(self, dbapi):
-        return dbapi.TIMESTAMP
-
-
-class DATETIME(DateTime):
-
-    """The SQL DATETIME type."""
-
-    __visit_name__ = 'DATETIME'
-
-
-class DATE(Date):
-
-    """The SQL DATE type."""
-
-    __visit_name__ = 'DATE'
-
-
-class TIME(Time):
-
-    """The SQL TIME type."""
-
-    __visit_name__ = 'TIME'
-
-
-class TEXT(Text):
-
-    """The SQL TEXT type."""
-
-    __visit_name__ = 'TEXT'
-
-
-class CLOB(Text):
-
-    """The CLOB type.
-
-    This type is found in Oracle and Informix.
-    """
-
-    __visit_name__ = 'CLOB'
-
-
-class VARCHAR(String):
-
-    """The SQL VARCHAR type."""
-
-    __visit_name__ = 'VARCHAR'
-
-
-class NVARCHAR(Unicode):
-
-    """The SQL NVARCHAR type."""
-
-    __visit_name__ = 'NVARCHAR'
-
-
-class CHAR(String):
-
-    """The SQL CHAR type."""
-
-    __visit_name__ = 'CHAR'
-
-
-class NCHAR(Unicode):
-
-    """The SQL NCHAR type."""
-
-    __visit_name__ = 'NCHAR'
-
-
-class BLOB(LargeBinary):
-
-    """The SQL BLOB type."""
-
-    __visit_name__ = 'BLOB'
-
-
-class BINARY(_Binary):
-
-    """The SQL BINARY type."""
-
-    __visit_name__ = 'BINARY'
-
-
-class VARBINARY(_Binary):
-
-    """The SQL VARBINARY type."""
-
-    __visit_name__ = 'VARBINARY'
-
-
-class BOOLEAN(Boolean):
-
-    """The SQL BOOLEAN type."""
-
-    __visit_name__ = 'BOOLEAN'
-
-
-class NullType(TypeEngine):
-
-    """An unknown type.
-
-    :class:`.NullType` is used as a default type for those cases where
-    a type cannot be determined, including:
-
-    * During table reflection, when the type of a column is not recognized
-      by the :class:`.Dialect`
-    * When constructing SQL expressions using plain Python objects of
-      unknown types (e.g. ``somecolumn == my_special_object``)
-    * When a new :class:`.Column` is created, and the given type is passed
-      as ``None`` or is not passed at all.
-
-    The :class:`.NullType` can be used within SQL expression invocation
-    without issue, it just has no behavior either at the expression
-    construction level or at the bind-parameter/result processing level.
-    :class:`.NullType` will result in a :exc:`.CompileError` if the compiler
-    is asked to render the type itself, such as if it is used in a
-    :func:`.cast` operation or within a schema creation operation such as that
-    invoked by :meth:`.MetaData.create_all` or the :class:`.CreateTable`
-    construct.
-
-    """
-    __visit_name__ = 'null'
-
-    _isnull = True
-
-    hashable = False
-
-    def literal_processor(self, dialect):
-        def process(value):
-            return "NULL"
-        return process
-
-    class Comparator(TypeEngine.Comparator):
-
-        def _adapt_expression(self, op, other_comparator):
-            if isinstance(other_comparator, NullType.Comparator) or \
-                    not operators.is_commutative(op):
-                return op, self.expr.type
-            else:
-                return other_comparator._adapt_expression(op, self)
-    comparator_factory = Comparator
-
-
-class MatchType(Boolean):
-    """Refers to the return type of the MATCH operator.
-
-    As the :meth:`.ColumnOperators.match` is probably the most open-ended
-    operator in generic SQLAlchemy Core, we can't assume the return type
-    at SQL evaluation time, as MySQL returns a floating point, not a boolean,
-    and other backends might do something different.    So this type
-    acts as a placeholder, currently subclassing :class:`.Boolean`.
-    The type allows dialects to inject result-processing functionality
-    if needed, and on MySQL will return floating-point values.
-
-    .. versionadded:: 1.0.0
-
-    """
-
-NULLTYPE = NullType()
-BOOLEANTYPE = Boolean()
-STRINGTYPE = String()
-INTEGERTYPE = Integer()
-MATCHTYPE = MatchType()
-
-_type_map = {
-    int: Integer(),
-    float: Numeric(),
-    bool: BOOLEANTYPE,
-    decimal.Decimal: Numeric(),
-    dt.date: Date(),
-    dt.datetime: DateTime(),
-    dt.time: Time(),
-    dt.timedelta: Interval(),
-    util.NoneType: NULLTYPE
-}
-
-if util.py3k:
-    _type_map[bytes] = LargeBinary()
-    _type_map[str] = Unicode()
-else:
-    _type_map[unicode] = Unicode()
-    _type_map[str] = String()
-
-_type_map_get = _type_map.get
-
-
-def _resolve_value_to_type(value):
-    _result_type = _type_map_get(type(value), False)
-    if _result_type is False:
-        # use inspect() to detect SQLAlchemy built-in
-        # objects.
-        insp = inspection.inspect(value, False)
-        if (
-                insp is not None and
-                # foil mock.Mock() and other impostors by ensuring
-                # the inspection target itself self-inspects
-                insp.__class__ in inspection._registrars
-        ):
-            raise exc.ArgumentError(
-                "Object %r is not legal as a SQL literal value" % value)
-        return NULLTYPE
-    else:
-        return _result_type
-
-# back-assign to type_api
-from . import type_api
-type_api.BOOLEANTYPE = BOOLEANTYPE
-type_api.STRINGTYPE = STRINGTYPE
-type_api.INTEGERTYPE = INTEGERTYPE
-type_api.NULLTYPE = NULLTYPE
-type_api.MATCHTYPE = MATCHTYPE
-type_api.INDEXABLE = Indexable
-type_api._resolve_value_to_type = _resolve_value_to_type
-TypeEngine.Comparator.BOOLEANTYPE = BOOLEANTYPE
-
- -
-
-
- -
-
- - - - - - - \ No newline at end of file diff --git a/docs/_sources/architecture.rst.txt b/docs/_sources/architecture.rst.txt deleted file mode 100644 index 586dcc9..0000000 --- a/docs/_sources/architecture.rst.txt +++ /dev/null @@ -1,373 +0,0 @@ -Submission & moderation subsystem architecture -********************************************** - -Overview -======== -The submission and moderation subsystem provides: - -- Accession of submission of publication content and metadata via multiple - interfaces, including interfaces provided by trusted third-party platforms; -- Quality assurance tools and workflows to screen submissions (moderation); -- An extensible system for automating parts of the moderation process; -- An event-based log of all submission and moderation activity related - associated with a paper. - -In short, the submission and moderation subsystem is responsible for all -submission-related activities up to (but not including) publication. - -Key requirements -================ - -1. The system must sensibly incorporate input from, and synchronize the - activities of, a variety of human and non-human agents. -2. It must be possible for administrators to audit all changes to submission - state in the system (e.g. by submitters, moderators, automated processes, - etc). -3. Administrators must be able to configure automated rules and processes. -4. The system must support future development of potentially many alternative - interfaces for submission and moderation, including interfaces developed - and operated by trusted third-parties. -5. The system must be able to support a high volume of activity, potentially - two orders of magnitude greater than current levels (~11k submissions per - month in early 2018). -6. The system must make it easier to support future operational and policy - changes around submission content, quality assurance, metadata, and other - areas of concern. - - -Solution Strategy -================= - -Submission system - Refers to the collection of services/applications, data stores, and other - components involved in the accession and moderation of new arXiv papers. -Submission - Refers to a collection of descriptive and operational metadata, including - a reference to a content object (e.g. a TeX source package, PDF, etc), that - has been accessioned for possible publication in arXiv. - -Separation of concerns ----------------------- -In the classic arXiv submission system, there is tight coupling between the -submission and a variety of related objects and processes. For example, -processes like TeX compilation, auto-classification, etc are integrated with -web controllers for the submission UI. A major benefit of this approach is that -it keeps operations close together in the submission workflow. A major drawback -is its relative inflexibility: developing any one component of the submission -system risks generating cascading effects to other components, and assumptions -about the implementation details of components are baked into the system. - -One of the major shifts in the NG reimplementation of the submission system is -to pull some of those components apart into self-contained services with -clearly-defined APIs. Our goal is to limit coupling to where it really matters, -and open the door to exchangeability of those components. This should make it -easier to develop individual components without breaking the whole system, and -also make it easier to respond to changing operational policies and procedures. - -The :ref:`utility-services` section describes some of the backend components -that will be "compartmentalized" as stand-alone services in NG. - -Commands (events) as data -------------------------- -The classic arXiv submission system is built around an object-centric data -model. Submissions are represented objects whose properties map to rows in a -database table, and workflows are implemented by developing web controllers -that mutate those objects (and the underlying rows). In order to support -administrative requirements of visibility onto activity in the submission -system, a log is updated by those controllers whenever they are executed. -Conditional operations are implemented by adding procedures to those -controllers. This model works well for simple systems in which there is a -single point of entry for submission data: each controller is solely -responsible for a command or set of commands, and so coupling between user -request handling/views and the commands themselves (along with conditional -operations linked to those commands) is not problematic. - -A requirement of arXiv-NG is to provide consistent support for evolving and -potentially many accession pathways into arXiv. A limitation of the classic -architecture is that it requires new submission interfaces to reimplement the -commands (and rules) that it exposes, and to reimplement updates to the -administrative log. In the NG submission system, commands (and log updates) -are independent of the interface controllers -- this allows for a greater -deal of flexibility when implementing or changing interfaces. We can achieve -this either by implementing a command controller as standalone service that -handles commands from other applications, or by implementing a software package -that exposes commands as an internal API (arXiv-lib could be seen as an -attempt in that direction, although it is somewhat defeated by its broad scope -and leakage of business logic). - -Another major requirement of arXiv-NG is to support triggers and automated -processes that can be configured by administrators, in addition to continuing -to support to the administrative log. A step in this direction would be to -include hooks for triggers behind the command API (above), and load parameters -(e.g. set in a database or a configuration file by an admin) that control -whether/how the trigger is executed. This has the potential to not scale well, -however, as the kinds of triggers and automation required must be anticipated -ahead of time and semi-hard-coded into the system. An alternative approach (the -one adopted here) is to define a set of primitives that explicitly represent -commands and rules, and build interfaces that allow them to be combined -arbitrarily to build workflows. In this approach, instances of command -execution (events) themselves are treated as data. This meets the requirements -of maintaining a high-fidelity comprehensive activity log. - -A knock-on benefit of treating command execution/events as data is that it -allows for freer evolution of how we represent submission objects. If event -data are treated as the primary source of truth, the representation of the -submission itself can be treated as a secondary and somewhat disposable -projection. In the short term, as we reimplement components of the submission -system, we will need to guarantee that we generate projections in the classic -submission database that satisfy the requirements of legacy components that -have not yet been reimplemented. For example, when implementing a new -submission UI for NG we can collect and store new forms of data about a -submission in the event data (e.g. data used to populate new metadata fields), -but must also ensure that the appropriate tables in the classic database are -kept up-to-date for the sake of the classic moderation system. In the longer -term, projections of event data can be used to support efficient queries, but -do not constrain the evolution of the submission system in other areas. - -Overview --------- -- We will decouple most functional components of the classic submission system - into independent services that are agnostic about submissions. This includes - classification, overlap detection, compilation (TeX, PS), and upload/file - management. -- We will implement a :ref:`Python package ` - that is responsible for all commands in the scope of the submission system. - That package should: - - - Define the commands that are available in the submission system, and - provide a Python API for executing those commands. - - Provide an API for defining rules and conditional operations based on those - commands. - - Be responsible for updating the core submission database. It should persist - command execution instances/events in the core database, and also generate - projections of submission state that support query/read operations and - that are compatible with legacy components. - -- A set of :ref:`core submission interface services ` - will provide UIs and APIs to support various submission and moderation - workflows. Those services will utilize the the core command/event package - (above). - - -Context -======= -Authenticated users submit new publications via a user interface. Users can -view the status of their submissions, including feedback and interventions by -moderators and administrators, and amend their submissions as necessary. They -can also view a preview of their submission, and make amendments to the source -files in their submission. Authors can supplement their published and -unpublished submissions with links to external resources and other -supplemental metadata. - -Moderators (authenticated users with a moderator role) screen and curate -submissions through a moderation interface. They can generate comments, flags, -proposals, and other annotations attached to submissions. - -Administrators audit and manage the submission platform, including the -behavior of automated processes and policies, through an administrative -interface. They can define rules using command/event types and conditions on -event data, and link those to other commands or processes that will execute -automatically when conditions are met. - -Authors may also submit papers via authorized third-party interfaces, which -integrate with arXiv via HTTP APIs exposed by the arXiv API gateway. API -clients may deposit submissions in bulk (e.g. conference proceedings), or on -an individual basis acting directly on behalf of an arXiv user. Submissions -handled by clients operated by trusted partners may be handled differently than -submissions originating from the arXiv submission interface, as dictated by -arXiv policies. - -Containers (Services & Building Blocks) -======================================= - -.. _figure-submission-containers: - -.. figure:: _static/diagrams/submissionContainers.png - - Containers in the arXiv submission & moderation subsystem. - - -.. _submission-database: - -Submission database -------------------- -The submission database (currently MySQL) is responsible for the persistence of -operational and core descriptive metadata about submissions. Operational -metadata includes information related to arXiv workflows and processes. Core -descriptive metadata are the core publication metadata fields required for -arXiv submissions (e.g. title, authors, abstract). The primary source of truth -for the state of each submission is a set of transformation events. Derivative -representations (e.g. of submission objects) are also stored for querying and -rapid access. - -In early phases of the classic renewal process, this will be the classic MySQL -database running in the CUL-IT datacenter. Upon migration to the cloud, this -may be replaced with something else. - -.. _core-interface-services: - -Core interface services ------------------------ -These services provide the core submission, moderation, and administrative -interfaces for the arXiv submission system. Each of these services integrates -with the :ref:`submission-database` to modify submission state, via the -:ref:`submission-core-events-package`. - -Asynchronous operations (e.g. to execute rule-based logic) are performed by a -:ref:`submission-worker` process. Communication between the interface services -and the worker is mediated by a task queue (Redis). Tasks passed on the queue -are implemented in the :ref:`submission-core-events-package` using -`Celery `_. - -These core interface services integrate with other services in the submission -system (e.g. :ref:`file-management-service`, :ref:`compilation-service`) via -their HTTP APIs. - -.. _submission-core-events-package: - -Submission core events package -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -This package provides an event-based Python API for CRUD operations on -submissions and submission-related (meta)data. Services (below) that operate -on submission data do so via this abstraction, which integrates with the -:ref:`submission-database`. - -Rather than perform CRUD operations directly on submission objects, all -operations that modify submission data are performed through the creation of -submission events. This ensures that we have a precise and complete record of -activities concerning submissions, an explicit definition of -operations that can be performed within the arXiv submission system, and a -starting-point for building rule-based workflows to support moderation and -administrative tasks. - -This package also provides integration with a Kinesis notification broker, -which propagates notifications about events in real time to other services -in the arXiv system (e.g. the :ref:`webook-notification-service`). - -To support automated processes, this package also implements a set of -asynchronous tasks using `Celery `_. Provides -integration with a task queue (Redis) for message passing. - -See :ref:`submission-core-events-package-containers`. - -.. _submission-worker: - -Submission worker -^^^^^^^^^^^^^^^^^ -The submission worker is a Celery process that executes tasks defined in the -:ref:`submission-core-events-package` and dispatched by core interface -services. This allows us to implement rule processing asynchronously, if needed -for longer-running operations. - -Submission UI service -^^^^^^^^^^^^^^^^^^^^^ -Provides form-based views that allow users to create and update submissions, -and track the state of their submission through the moderation and publication -process. The interface supports metadata entry, source package upload, and -integrates with the :ref:`compilation-service` to assist the submitter in -preparing a publication-ready submission package. - -Uses the :ref:`submission-core-events-package` to update submission state in -the :ref:`submission-database`. - -Submission API service -^^^^^^^^^^^^^^^^^^^^^^ -Provides a RESTful API for trusted clients to facilitate submission to arXiv -via external/third-party user interfaces. Uses the -:ref:`submission-core-events-package` to update submission state in the -:ref:`submission-database`. - -This will replace the existing `arXiv SWORDv1 API -`_. - -Moderation UI service -^^^^^^^^^^^^^^^^^^^^^ -Supports moderator actions on submissions. Comprised of a client-side -application (implemented in `React `_) backed by a -lightweight Flask service. Uses the -:ref:`submission-core-events-package` to update submission state in the -:ref:`submission-database`. - -Administrative UI service -^^^^^^^^^^^^^^^^^^^^^^^^^ -The administrator interfaces provides visibility onto all parts of the -submission service, including the state and event history of all submissions -and submission annotations in the system. Administrators are able to configure -automated policies and processes, intervene on submission content and metadata, -and act on moderator proposals and comments. - - -.. _utility-services: - -Utility services ----------------- -The following utility services support the submission and moderation workflow, -providing a menu of functionality used by UI and API services to support -accession and quality assurance. - -.. _file-management-service: - -File management service -^^^^^^^^^^^^^^^^^^^^^^^ -This service is responsible for ensuring the safety and suitability of files -uploaded to the submission system. The file management service accepts -uploads, performs verification and sanitization, and makes the upload available -for use by other services. - -.. _compilation-service: - -Compilation service -^^^^^^^^^^^^^^^^^^^ -https://github.com/cul-it/arxiv-converter - -The build service compiles sanitized upload packages into PDF, PostScript, -and other formats. This service encompasses the arXiv TeX tree. Compilation -logs are also made available, for example to provide submitters feedback about -compilation failures or warnings. - -.. _plain-text-extraction-service: - -Plain text extraction service -^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ -https://github.com/cul-it/arxiv-fulltext - -Extracts plain text content from PDFs, for use by the for overlap detection -and classification services. Makes both raw extracted text and normalized -"PSV" tokenized text available to other services. - -.. _overlap-detection-service: - -Overlap detection service -^^^^^^^^^^^^^^^^^^^^^^^^^ -https://github.com/cul-it/arxiv-docsim - -Operates on extracted plain text content and submission metadata to -detect possibly duplicate submissions. Returns an array of published arXiv -papers with a high degree of overlap. - -.. _classifier-service: - -Classifier service -^^^^^^^^^^^^^^^^^^ -https://github.com/cul-it/arxiv-classifier - -Operates on extracted plain text content and submission metadata to -propose categories for submitted papers. - -Notification service -^^^^^^^^^^^^^^^^^^^^ -Responsible for dispatching email notifications to submitters, moderators, -in response to submission system events. Provides UIs for end-user and -administrator configuration. - -Endorsement service -^^^^^^^^^^^^^^^^^^^ -Provides submitter endorsement mechanisms. Submission services can use the -backend API provided by this service to validate author endorsement. Provides -administrative and submitter UIs to manage endorsement status. - -Web-hook notification service ------------------------------ -Provides mechanisms for API clients to register callbacks for submission -events. Event consumer is implemented using the Kinesis Consumer Library and -MultiLangDaemon [refs]. diff --git a/docs/_sources/containers/api/api.controllers.rst.txt b/docs/_sources/containers/api/api.controllers.rst.txt deleted file mode 100644 index cf5e1e0..0000000 --- a/docs/_sources/containers/api/api.controllers.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -api\.controllers package -======================== - -.. automodule:: api.controllers - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -api\.controllers\.submission module ------------------------------------ - -.. automodule:: api.controllers.submission - :members: - :undoc-members: - :show-inheritance: - -api\.controllers\.tests module ------------------------------- - -.. automodule:: api.controllers.tests - :members: - :undoc-members: - :show-inheritance: - -api\.controllers\.util module ------------------------------ - -.. automodule:: api.controllers.util - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/api/api.rst.txt b/docs/_sources/containers/api/api.rst.txt deleted file mode 100644 index 8603fb8..0000000 --- a/docs/_sources/containers/api/api.rst.txt +++ /dev/null @@ -1,51 +0,0 @@ -api package -=========== - -.. automodule:: api - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - api.controllers - -Submodules ----------- - -api\.config module ------------------- - -.. automodule:: api.config - :members: - :undoc-members: - :show-inheritance: - -api\.exceptions module ----------------------- - -.. automodule:: api.exceptions - :members: - :undoc-members: - :show-inheritance: - -api\.factory module -------------------- - -.. automodule:: api.factory - :members: - :undoc-members: - :show-inheritance: - -api\.routes module ------------------- - -.. automodule:: api.routes - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/api/events.domain.rst.txt b/docs/_sources/containers/core/api/events.domain.rst.txt deleted file mode 100644 index 97292dd..0000000 --- a/docs/_sources/containers/core/api/events.domain.rst.txt +++ /dev/null @@ -1,44 +0,0 @@ -events\.domain package -====================== - -.. automodule:: events.domain - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -events\.domain\.agent module ----------------------------- - -.. automodule:: events.domain.agent - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.event module ----------------------------- - -.. automodule:: events.domain.event - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.rule module ---------------------------- - -.. automodule:: events.domain.rule - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.submission module ---------------------------------- - -.. automodule:: events.domain.submission - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/api/events.rst.txt b/docs/_sources/containers/core/api/events.rst.txt deleted file mode 100644 index 38f81b5..0000000 --- a/docs/_sources/containers/core/api/events.rst.txt +++ /dev/null @@ -1,44 +0,0 @@ -events package -============== - -.. automodule:: events - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - events.domain - events.services - -Submodules ----------- - -events\.context module ----------------------- - -.. automodule:: events.context - :members: - :undoc-members: - :show-inheritance: - -events\.exceptions module -------------------------- - -.. automodule:: events.exceptions - :members: - :undoc-members: - :show-inheritance: - -events\.tests module --------------------- - -.. automodule:: events.tests - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/api/events.services.classic.rst.txt b/docs/_sources/containers/core/api/events.services.classic.rst.txt deleted file mode 100644 index 578bb86..0000000 --- a/docs/_sources/containers/core/api/events.services.classic.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -events\.services\.classic package -================================= - -.. automodule:: events.services.classic - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -events\.services\.classic\.exceptions module --------------------------------------------- - -.. automodule:: events.services.classic.exceptions - :members: - :undoc-members: - :show-inheritance: - -events\.services\.classic\.models module ----------------------------------------- - -.. automodule:: events.services.classic.models - :members: - :undoc-members: - :show-inheritance: - -events\.services\.classic\.tests module ---------------------------------------- - -.. automodule:: events.services.classic.tests - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/api/events.services.rst.txt b/docs/_sources/containers/core/api/events.services.rst.txt deleted file mode 100644 index 2fac3b8..0000000 --- a/docs/_sources/containers/core/api/events.services.rst.txt +++ /dev/null @@ -1,27 +0,0 @@ -events\.services package -======================== - -.. automodule:: events.services - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - events.services.classic - -Submodules ----------- - -events\.services\.notification module -------------------------------------- - -.. automodule:: events.services.notification - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/api/modules.rst.txt b/docs/_sources/containers/core/api/modules.rst.txt deleted file mode 100644 index 31ed2fe..0000000 --- a/docs/_sources/containers/core/api/modules.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -events -====== - -.. toctree:: - :maxdepth: 4 - - events diff --git a/docs/_sources/containers/core/events.domain.rst.txt b/docs/_sources/containers/core/events.domain.rst.txt deleted file mode 100644 index 97292dd..0000000 --- a/docs/_sources/containers/core/events.domain.rst.txt +++ /dev/null @@ -1,44 +0,0 @@ -events\.domain package -====================== - -.. automodule:: events.domain - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -events\.domain\.agent module ----------------------------- - -.. automodule:: events.domain.agent - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.event module ----------------------------- - -.. automodule:: events.domain.event - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.rule module ---------------------------- - -.. automodule:: events.domain.rule - :members: - :undoc-members: - :show-inheritance: - -events\.domain\.submission module ---------------------------------- - -.. automodule:: events.domain.submission - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/events.rst.txt b/docs/_sources/containers/core/events.rst.txt deleted file mode 100644 index 36bf792..0000000 --- a/docs/_sources/containers/core/events.rst.txt +++ /dev/null @@ -1,36 +0,0 @@ -events package -============== - -.. automodule:: events - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - events.domain - events.services - -Submodules ----------- - -events\.exceptions module -------------------------- - -.. automodule:: events.exceptions - :members: - :undoc-members: - :show-inheritance: - -events\.tests module --------------------- - -.. automodule:: events.tests - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/events.services.classic.rst.txt b/docs/_sources/containers/core/events.services.classic.rst.txt deleted file mode 100644 index 37a2b06..0000000 --- a/docs/_sources/containers/core/events.services.classic.rst.txt +++ /dev/null @@ -1,44 +0,0 @@ -events\.services\.classic package -================================= - -.. automodule:: events.services.classic - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -events\.services\.classic\.exceptions module --------------------------------------------- - -.. automodule:: events.services.classic.exceptions - :members: - :undoc-members: - :show-inheritance: - -events\.services\.classic\.models module ----------------------------------------- - -.. automodule:: events.services.classic.models - :members: - :undoc-members: - :show-inheritance: - -events\.services\.classic\.tests module ---------------------------------------- - -.. automodule:: events.services.classic.tests - :members: - :undoc-members: - :show-inheritance: - -events\.services\.classic\.util module --------------------------------------- - -.. automodule:: events.services.classic.util - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/events.services.rst.txt b/docs/_sources/containers/core/events.services.rst.txt deleted file mode 100644 index 2fac3b8..0000000 --- a/docs/_sources/containers/core/events.services.rst.txt +++ /dev/null @@ -1,27 +0,0 @@ -events\.services package -======================== - -.. automodule:: events.services - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - events.services.classic - -Submodules ----------- - -events\.services\.notification module -------------------------------------- - -.. automodule:: events.services.notification - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/core/index.rst.txt b/docs/_sources/containers/core/index.rst.txt deleted file mode 100644 index e8eecd5..0000000 --- a/docs/_sources/containers/core/index.rst.txt +++ /dev/null @@ -1,11 +0,0 @@ -.. _submission-core-events-package-containers: - -Submission core events package -############################## - -foo - -.. toctree:: - :maxdepth: 2 - - api/modules.rst diff --git a/docs/_sources/containers/core/modules.rst.txt b/docs/_sources/containers/core/modules.rst.txt deleted file mode 100644 index 31ed2fe..0000000 --- a/docs/_sources/containers/core/modules.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -events -====== - -.. toctree:: - :maxdepth: 4 - - events diff --git a/docs/_sources/containers/metadata/metadata.controllers.rst.txt b/docs/_sources/containers/metadata/metadata.controllers.rst.txt deleted file mode 100644 index cad54cc..0000000 --- a/docs/_sources/containers/metadata/metadata.controllers.rst.txt +++ /dev/null @@ -1,27 +0,0 @@ -metadata\.controllers package -============================= - -.. automodule:: metadata.controllers - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - metadata.controllers.submission - -Submodules ----------- - -metadata\.controllers\.util module ----------------------------------- - -.. automodule:: metadata.controllers.util - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/metadata/metadata.controllers.submission.rst.txt b/docs/_sources/containers/metadata/metadata.controllers.submission.rst.txt deleted file mode 100644 index f551100..0000000 --- a/docs/_sources/containers/metadata/metadata.controllers.submission.rst.txt +++ /dev/null @@ -1,28 +0,0 @@ -metadata\.controllers\.submission package -========================================= - -.. automodule:: metadata.controllers.submission - :members: - :undoc-members: - :show-inheritance: - -Submodules ----------- - -metadata\.controllers\.submission\.handlers module --------------------------------------------------- - -.. automodule:: metadata.controllers.submission.handlers - :members: - :undoc-members: - :show-inheritance: - -metadata\.controllers\.submission\.tests module ------------------------------------------------ - -.. automodule:: metadata.controllers.submission.tests - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/metadata/metadata.rst.txt b/docs/_sources/containers/metadata/metadata.rst.txt deleted file mode 100644 index 425ba0c..0000000 --- a/docs/_sources/containers/metadata/metadata.rst.txt +++ /dev/null @@ -1,43 +0,0 @@ -metadata package -================ - -.. automodule:: metadata - :members: - :undoc-members: - :show-inheritance: - -Subpackages ------------ - -.. toctree:: - - metadata.controllers - -Submodules ----------- - -metadata\.config module ------------------------ - -.. automodule:: metadata.config - :members: - :undoc-members: - :show-inheritance: - -metadata\.factory module ------------------------- - -.. automodule:: metadata.factory - :members: - :undoc-members: - :show-inheritance: - -metadata\.routes module ------------------------ - -.. automodule:: metadata.routes - :members: - :undoc-members: - :show-inheritance: - - diff --git a/docs/_sources/containers/metadata/modules.rst.txt b/docs/_sources/containers/metadata/modules.rst.txt deleted file mode 100644 index 53e760c..0000000 --- a/docs/_sources/containers/metadata/modules.rst.txt +++ /dev/null @@ -1,7 +0,0 @@ -metadata -======== - -.. toctree:: - :maxdepth: 4 - - metadata diff --git a/docs/_sources/index.rst.txt b/docs/_sources/index.rst.txt deleted file mode 100644 index 9f17eca..0000000 --- a/docs/_sources/index.rst.txt +++ /dev/null @@ -1,14 +0,0 @@ -Submission & moderation -*********************** - -This document describes the architecture of the submission & moderation -subsystem. - -.. toctree:: - :maxdepth: 2 - - architecture - publication_process - submission_api_context - containers/core/events - containers/metadata/metadata diff --git a/docs/_sources/submission_api_context.rst.txt b/docs/_sources/submission_api_context.rst.txt deleted file mode 100644 index 362cca2..0000000 --- a/docs/_sources/submission_api_context.rst.txt +++ /dev/null @@ -1,224 +0,0 @@ -Submission API: Context -*********************** - -.. note:: - - This section is badly out of date. Disregard for now. - - -The arXiv submission API provides programmatic access to the arXiv submission -system for API consumers. - -Submission Workflows -==================== - -Proxy Submission ----------------- -Proxy submission is when an API client submits on behalf of an arXiv user who -has explicitly delegated authorization to the client. - -A client that wishes to perform proxy submission must have ``auth:3legged`` and -``submit:proxy`` scope, and must implement a secure three-legged OAuth -authorization process. - -In proxy submission, the arXiv user who has authorized the client to submit -on their behalf will be the primary owner of the submission. This allows the -user to intervene directly on the submission process later on, and provides -some flexibility to clients who may wish only to partially implement the -submission process. - -Note that in the classic arXiv system, "proxy submission" referred to bulk -deposit via the SWORDv1 API. - -Bulk Submission ---------------- -Bulk submission is when an API client submits directly to arXiv without the -involvement of an arXiv user. Bulk submission may be appropriate for -conference proceedings or other large-volume deposits for which it is -impossible or impracticable to involve individual users. - -A client that wishes to perform bulk submission must have a ``submit:bulk`` -scope. - -In bulk submission, the client is the primary owner of the submission. To -give ownership of the submission to an arXiv user, the client must take -explicit action to transfer ownership. - -Access & Authorization -====================== - -User of the submission API requires client credentials, which can be obtained -via the arXiv API Client Registry. See ... - -Relevant Scopes ---------------- -Ensure that your client credentials have been granted the necessary scopes for -your use-case. To request that missing scopes be added to your credentials, -see ... - -- ``auth:3legged``: Required for proxy submission. -- ``submit:proxy``: Required for proxy submission. -- ``submit:bulk``: Required for bulk submission. - -Two-legged Authorization ------------------------- -Two-legged authorization grants access to resources for which end-user -involvement is not required. This is suitable for bulk submission, but not -proxy submission. This authorization mechanism involves exchanging your -client id and client secret for an access token. - -.. code-block:: bash - - $ curl -i -L \ - > -d "client_id=[ your client id ]" \ - > -d "client_secret=[ your client secret ]" \ - > -d "grant_type=client_credentials" \ - > "https://api.arxiv.org/auth/token" - {"access_token":"[ your access token ]","token_type":"bearer", - "refresh_token":"[ your refresh token ]","expires_in":3600} - - -Use your access token in subsequent requests by passing it in the Authorization -header. For example: - -.. code-block:: bash - - $ curl -i -L \ - > -H "Authorization: [ your access token ]" \ - > "https://api.arxiv.org/submit/" - - -When your access token expires, you can request a new one with: - -.. code-block:: bash - -$ curl -i -L \ -> -d "refresh_token=[ your refresh token ]" \ -> -d "grant_type=refresh_token" \ -> "https://api.arxiv.org/auth/token" -{"access_token":"[ your new access token ]","token_type":"bearer", -"refresh_token":"[ your new refresh token ]","expires_in":3600} - - -Three-legged Authorization --------------------------- -Three-legged authorization allows arXiv users to delegate API clients to take -actions on their behalf. This is required for proxy submission. Note that your -client credentials must have an associated ``auth:3legged`` scope, and you -must have entered a valid callback URI for your application. - -- Client initiates authorization by directing the user to the arXiv API - authorization endpoint: ``https://api.arxiv.org/auth/authorize?client_id=[ your client ID ]`` -- User is asked to log in and authorize your client. If the user does not - already have an arXiv account, they are given the option to create one at - this time, and then proceed with authorization. -- If the user authorizes your client, they will be redirected to your - registered callback URI. A short-lived authorization code will be included - as a GET parameter, e.g. ``https://yourapp.foo/callback?code=[ auth code ]`` -- Client may exchange the short-lived authorization code for a longer-lived - authorization token: - - $ curl -i -L \ - > -d "client_id=[ your client id ]" \ - > -d "client_secret=[ your client secret ]" \ - > -d "code=[ your auth code ]" \ - > -d "grant_type=authorization_code" \ - > "https://api.arxiv.org/auth/token" - {"access_token":"[ your access token ]","token_type":"bearer", - "refresh_token":"[ your refresh token ]","expires_in":3600} - -The authorization code may only be used once. Multiple attempts to exchange the -authorization code for an authorization token will invalidate both the -authorization code and the authorization token that was generated on the first -request. - -Use your authorization token in subsequent requests by passing it in the -Authorization header. For example: - -.. code-block:: bash - - $ curl -i -L \ - > -H "Authorization: [ your access token ]" \ - > "https://api.arxiv.org/submit/" - -Endorsement ------------ -Most subject areas in arXiv require that the submitter be endorsed by another -member of the scientific community. For more information about what endorsement -is and how it works on a per-user level, see... - -In addition to the required authorization scopes mentioned above, the API -client must usually also be granted an endorsement scope for the subject areas -to which it intends to submit. Endorsement scopes may be requested through the -arXiv API Client Registry; see ... - -Exception: in the case of proxy submission, the user on whose behalf the -client submits to arXiv may already be endorsed for a particular subject area. -If so, the client need not be endorsed for that subject area for the submission -to proceed. - -Submission Overview -=================== -The submission process is essentially the same for proxy and bulk submissions, -as ownership is inferred from the authorization token provided in each -request. - -Submission is initiated upon creation of a new submission resource, by -POSTing to the ``/submission/`` endpoint. The submission resource need not be -complete at this time. See :ref:`api-create`. - -The submission source package may then be added by PUTing the package (see -:ref:`accepted-package-formats`) to the source endpoint: -``/submission/{id}/source/``. The response will include a redirect to a status -endpoint; the source package will be sanitized and unpacked, which may take a -little while, and the status endpoint can be monitored for progress. -Alternatively, a webhook may be configured to receive notifications about -source processing events. See :ref:`api-source`. - -When a source package is uploaded, by default the arXiv submission system will -attempt to compile the source to PDF. Automatic compilation may be disabled, -e.g. to allow for a multi-step upload process. To trigger compilation directly, -a POST request may be made to the compilation endpoint: -``/submission/{id}/source/compile/``. The response will include a reference to -a status endpoint that can be monitored for progress; alternatively, a webhook -may be configured to receive notifications about compilation. - -If compilation is successful, the resulting PDF may be retrieved from: -``/submission/{id}/build/pdf/``. Compilation log output may be retrieved from -``/submission/{id}/build/log/``. - -Note that the source must compile successfully for submission to proceed, and -the submission resource must be updated to confirm that the client/user is -satisfied with the compiled paper. It is up to the client whether/how such -confirmation should occur. - -Updates to the submission may be made via subsequent POST requests to the -submission endpoint (``/submission/{id}/``). This allows the client to -spread the submission process over several steps, if desired. - -External links may be attached to the submission by POSTing to the links -endpoint, ``/submission/{id}/links/``. This may be used to supplement the -core metadata with links to external resources, such as code, data, multimedia -content, or an URI for an alternate version of the paper (e.g. in a -peer-reviewed journal). See :ref:`api-external-links`. - -Once all required procedural and descriptive metadata have been added to the -submission, it may be submitted by POSTing to the submit endpoint: -``/submission/{id}/submit/``. See :ref:`api-submit`. - -A client may register to receive updates about one or all submissions for which -it is responsible. To register a webhook for a specific submission, a POST -request may be made to ``/submission/{id}/webhooks/``. To register a webhook -for all submissions for which the client is responsible, a POST request may be -made to ``/webhooks/``. See :ref:`api-webhooks`. - -Once the submission has been published, the submission will be updated with -its arXiv identifier and version number. If a webhook is registered, a -publication notification will also be issued. - -The client may transfer ownership of the submission to another agent (user or -another client) via the ``/submission/{id}/transfer/`` endpoint. Note that this -is non-reversible without intervention from the recipient. An alternative is -to delegate editing privileges to another agent, via the -``/submission/{id}/delegate/`` endpoint. See :ref:`api-transfer` and -:ref:`api-delegation`. diff --git a/docs/_sources/submission_containers.rst.txt b/docs/_sources/submission_containers.rst.txt deleted file mode 100644 index c217105..0000000 --- a/docs/_sources/submission_containers.rst.txt +++ /dev/null @@ -1,40 +0,0 @@ -Containers -********** - -Submission API -============== - - -Submission Events Controller Service -==================================== - -Submission Events ------------------ - -The arXiv-NG submission system treats changes to or actions concerning a -submission as the primary unit of data. Metadata updates, moderation actions, -and procedures applied automatically by the submission system all generate -submission events. - -Events are stored in order, describe the transformation that they represent, -and encode the provenance of the event (who generated the event, and when). We -can play these events forward to calculate the current state of a submission, -or a past state. - -A complete list of submission event types can be found in :ref:`event-types`. - -When a submission event is created, several things occur: - -1. All of the recorded events for the submission are loaded from the database, - and the new event is inserted into that event stack. -2. The event is validated, based on the event's own data and the state of the - submission. -3. The event may trigger system or moderation rules, which generate additional - events that are inserted into the event stack. -4. The final state of the submission is calculated from the event stack, and - the new events and final state are persisted in the database. -5. New events are propagated to other arXiv services via a - :ref:`notification-broker`. -6. The :ref:`webhook-service` listens for events from the notification broker, - and propagates them to API clients who have registered a corresponding - webhook. diff --git a/docs/_static/ajax-loader.gif b/docs/_static/ajax-loader.gif deleted file mode 100644 index 61faf8c..0000000 Binary files a/docs/_static/ajax-loader.gif and /dev/null differ diff --git a/docs/_static/alabaster.css b/docs/_static/alabaster.css deleted file mode 100644 index be65b13..0000000 --- a/docs/_static/alabaster.css +++ /dev/null @@ -1,693 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - -@import url("basic.css"); - -/* -- page layout ----------------------------------------------------------- */ - -body { - font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; - font-size: 17px; - background-color: #fff; - color: #000; - margin: 0; - padding: 0; -} - - -div.document { - width: 940px; - margin: 30px auto 0 auto; -} - -div.documentwrapper { - float: left; - width: 100%; -} - -div.bodywrapper { - margin: 0 0 0 220px; -} - -div.sphinxsidebar { - width: 220px; - font-size: 14px; - line-height: 1.5; -} - -hr { - border: 1px solid #B1B4B6; -} - -div.body { - background-color: #fff; - color: #3E4349; - padding: 0 30px 0 30px; -} - -div.body > .section { - text-align: left; -} - -div.footer { - width: 940px; - margin: 20px auto 30px auto; - font-size: 14px; - color: #888; - text-align: right; -} - -div.footer a { - color: #888; -} - -p.caption { - font-family: inherit; - font-size: inherit; -} - - -div.relations { - display: none; -} - - -div.sphinxsidebar a { - color: #444; - text-decoration: none; - border-bottom: 1px dotted #999; -} - -div.sphinxsidebar a:hover { - border-bottom: 1px solid #999; -} - -div.sphinxsidebarwrapper { - padding: 18px 10px; -} - -div.sphinxsidebarwrapper p.logo { - padding: 0; - margin: -10px 0 0 0px; - text-align: center; -} - -div.sphinxsidebarwrapper h1.logo { - margin-top: -10px; - text-align: center; - margin-bottom: 5px; - text-align: left; -} - -div.sphinxsidebarwrapper h1.logo-name { - margin-top: 0px; -} - -div.sphinxsidebarwrapper p.blurb { - margin-top: 0; - font-style: normal; -} - -div.sphinxsidebar h3, -div.sphinxsidebar h4 { - font-family: 'Garamond', 'Georgia', serif; - color: #444; - font-size: 24px; - font-weight: normal; - margin: 0 0 5px 0; - padding: 0; -} - -div.sphinxsidebar h4 { - font-size: 20px; -} - -div.sphinxsidebar h3 a { - color: #444; -} - -div.sphinxsidebar p.logo a, -div.sphinxsidebar h3 a, -div.sphinxsidebar p.logo a:hover, -div.sphinxsidebar h3 a:hover { - border: none; -} - -div.sphinxsidebar p { - color: #555; - margin: 10px 0; -} - -div.sphinxsidebar ul { - margin: 10px 0; - padding: 0; - color: #000; -} - -div.sphinxsidebar ul li.toctree-l1 > a { - font-size: 120%; -} - -div.sphinxsidebar ul li.toctree-l2 > a { - font-size: 110%; -} - -div.sphinxsidebar input { - border: 1px solid #CCC; - font-family: 'goudy old style', 'minion pro', 'bell mt', Georgia, 'Hiragino Mincho Pro', serif; - font-size: 1em; -} - -div.sphinxsidebar hr { - border: none; - height: 1px; - color: #AAA; - background: #AAA; - - text-align: left; - margin-left: 0; - width: 50%; -} - -/* -- body styles ----------------------------------------------------------- */ - -a { - color: #004B6B; - text-decoration: underline; -} - -a:hover { - color: #6D4100; - text-decoration: underline; -} - -div.body h1, -div.body h2, -div.body h3, -div.body h4, -div.body h5, -div.body h6 { - font-family: 'Garamond', 'Georgia', serif; - font-weight: normal; - margin: 30px 0px 10px 0px; - padding: 0; -} - -div.body h1 { margin-top: 0; padding-top: 0; font-size: 240%; } -div.body h2 { font-size: 180%; } -div.body h3 { font-size: 150%; } -div.body h4 { font-size: 130%; } -div.body h5 { font-size: 100%; } -div.body h6 { font-size: 100%; } - -a.headerlink { - color: #DDD; - padding: 0 4px; - text-decoration: none; -} - -a.headerlink:hover { - color: #444; - background: #EAEAEA; -} - -div.body p, div.body dd, div.body li { - line-height: 1.4em; -} - -div.admonition { - margin: 20px 0px; - padding: 10px 30px; - background-color: #EEE; - border: 1px solid #CCC; -} - -div.admonition tt.xref, div.admonition code.xref, div.admonition a tt { - background-color: #FBFBFB; - border-bottom: 1px solid #fafafa; -} - -div.admonition p.admonition-title { - font-family: 'Garamond', 'Georgia', serif; - font-weight: normal; - font-size: 24px; - margin: 0 0 10px 0; - padding: 0; - line-height: 1; -} - -div.admonition p.last { - margin-bottom: 0; -} - -div.highlight { - background-color: #fff; -} - -dt:target, .highlight { - background: #FAF3E8; -} - -div.warning { - background-color: #FCC; - border: 1px solid #FAA; -} - -div.danger { - background-color: #FCC; - border: 1px solid #FAA; - -moz-box-shadow: 2px 2px 4px #D52C2C; - -webkit-box-shadow: 2px 2px 4px #D52C2C; - box-shadow: 2px 2px 4px #D52C2C; -} - -div.error { - background-color: #FCC; - border: 1px solid #FAA; - -moz-box-shadow: 2px 2px 4px #D52C2C; - -webkit-box-shadow: 2px 2px 4px #D52C2C; - box-shadow: 2px 2px 4px #D52C2C; -} - -div.caution { - background-color: #FCC; - border: 1px solid #FAA; -} - -div.attention { - background-color: #FCC; - border: 1px solid #FAA; -} - -div.important { - background-color: #EEE; - border: 1px solid #CCC; -} - -div.note { - background-color: #EEE; - border: 1px solid #CCC; -} - -div.tip { - background-color: #EEE; - border: 1px solid #CCC; -} - -div.hint { - background-color: #EEE; - border: 1px solid #CCC; -} - -div.seealso { - background-color: #EEE; - border: 1px solid #CCC; -} - -div.topic { - background-color: #EEE; -} - -p.admonition-title { - display: inline; -} - -p.admonition-title:after { - content: ":"; -} - -pre, tt, code { - font-family: 'Consolas', 'Menlo', 'Deja Vu Sans Mono', 'Bitstream Vera Sans Mono', monospace; - font-size: 0.9em; -} - -.hll { - background-color: #FFC; - margin: 0 -12px; - padding: 0 12px; - display: block; -} - -img.screenshot { -} - -tt.descname, tt.descclassname, code.descname, code.descclassname { - font-size: 0.95em; -} - -tt.descname, code.descname { - padding-right: 0.08em; -} - -img.screenshot { - -moz-box-shadow: 2px 2px 4px #EEE; - -webkit-box-shadow: 2px 2px 4px #EEE; - box-shadow: 2px 2px 4px #EEE; -} - -table.docutils { - border: 1px solid #888; - -moz-box-shadow: 2px 2px 4px #EEE; - -webkit-box-shadow: 2px 2px 4px #EEE; - box-shadow: 2px 2px 4px #EEE; -} - -table.docutils td, table.docutils th { - border: 1px solid #888; - padding: 0.25em 0.7em; -} - -table.field-list, table.footnote { - border: none; - -moz-box-shadow: none; - -webkit-box-shadow: none; - box-shadow: none; -} - -table.footnote { - margin: 15px 0; - width: 100%; - border: 1px solid #EEE; - background: #FDFDFD; - font-size: 0.9em; -} - -table.footnote + table.footnote { - margin-top: -15px; - border-top: none; -} - -table.field-list th { - padding: 0 0.8em 0 0; -} - -table.field-list td { - padding: 0; -} - -table.field-list p { - margin-bottom: 0.8em; -} - -/* Cloned from - * https://github.com/sphinx-doc/sphinx/commit/ef60dbfce09286b20b7385333d63a60321784e68 - */ -.field-name { - -moz-hyphens: manual; - -ms-hyphens: manual; - -webkit-hyphens: manual; - hyphens: manual; -} - -table.footnote td.label { - width: .1px; - padding: 0.3em 0 0.3em 0.5em; -} - -table.footnote td { - padding: 0.3em 0.5em; -} - -dl { - margin: 0; - padding: 0; -} - -dl dd { - margin-left: 30px; -} - -blockquote { - margin: 0 0 0 30px; - padding: 0; -} - -ul, ol { - /* Matches the 30px from the narrow-screen "li > ul" selector below */ - margin: 10px 0 10px 30px; - padding: 0; -} - -pre { - background: #EEE; - padding: 7px 30px; - margin: 15px 0px; - line-height: 1.3em; -} - -div.viewcode-block:target { - background: #ffd; -} - -dl pre, blockquote pre, li pre { - margin-left: 0; - padding-left: 30px; -} - -tt, code { - background-color: #ecf0f3; - color: #222; - /* padding: 1px 2px; */ -} - -tt.xref, code.xref, a tt { - background-color: #FBFBFB; - border-bottom: 1px solid #fff; -} - -a.reference { - text-decoration: none; - border-bottom: 1px dotted #004B6B; -} - -/* Don't put an underline on images */ -a.image-reference, a.image-reference:hover { - border-bottom: none; -} - -a.reference:hover { - border-bottom: 1px solid #6D4100; -} - -a.footnote-reference { - text-decoration: none; - font-size: 0.7em; - vertical-align: top; - border-bottom: 1px dotted #004B6B; -} - -a.footnote-reference:hover { - border-bottom: 1px solid #6D4100; -} - -a:hover tt, a:hover code { - background: #EEE; -} - - -@media screen and (max-width: 870px) { - - div.sphinxsidebar { - display: none; - } - - div.document { - width: 100%; - - } - - div.documentwrapper { - margin-left: 0; - margin-top: 0; - margin-right: 0; - margin-bottom: 0; - } - - div.bodywrapper { - margin-top: 0; - margin-right: 0; - margin-bottom: 0; - margin-left: 0; - } - - ul { - margin-left: 0; - } - - li > ul { - /* Matches the 30px from the "ul, ol" selector above */ - margin-left: 30px; - } - - .document { - width: auto; - } - - .footer { - width: auto; - } - - .bodywrapper { - margin: 0; - } - - .footer { - width: auto; - } - - .github { - display: none; - } - - - -} - - - -@media screen and (max-width: 875px) { - - body { - margin: 0; - padding: 20px 30px; - } - - div.documentwrapper { - float: none; - background: #fff; - } - - div.sphinxsidebar { - display: block; - float: none; - width: 102.5%; - margin: 50px -30px -20px -30px; - padding: 10px 20px; - background: #333; - color: #FFF; - } - - div.sphinxsidebar h3, div.sphinxsidebar h4, div.sphinxsidebar p, - div.sphinxsidebar h3 a { - color: #fff; - } - - div.sphinxsidebar a { - color: #AAA; - } - - div.sphinxsidebar p.logo { - display: none; - } - - div.document { - width: 100%; - margin: 0; - } - - div.footer { - display: none; - } - - div.bodywrapper { - margin: 0; - } - - div.body { - min-height: 0; - padding: 0; - } - - .rtd_doc_footer { - display: none; - } - - .document { - width: auto; - } - - .footer { - width: auto; - } - - .footer { - width: auto; - } - - .github { - display: none; - } -} - - -/* misc. */ - -.revsys-inline { - display: none!important; -} - -/* Make nested-list/multi-paragraph items look better in Releases changelog - * pages. Without this, docutils' magical list fuckery causes inconsistent - * formatting between different release sub-lists. - */ -div#changelog > div.section > ul > li > p:only-child { - margin-bottom: 0; -} - -/* Hide fugly table cell borders in ..bibliography:: directive output */ -table.docutils.citation, table.docutils.citation td, table.docutils.citation th { - border: none; - /* Below needed in some edge cases; if not applied, bottom shadows appear */ - -moz-box-shadow: none; - -webkit-box-shadow: none; - box-shadow: none; -} \ No newline at end of file diff --git a/docs/_static/basic.css b/docs/_static/basic.css deleted file mode 100644 index 6df76b0..0000000 --- a/docs/_static/basic.css +++ /dev/null @@ -1,639 +0,0 @@ -/* - * basic.css - * ~~~~~~~~~ - * - * Sphinx stylesheet -- basic theme. - * - * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ - -/* -- main layout ----------------------------------------------------------- */ - -div.clearer { - clear: both; -} - -/* -- relbar ---------------------------------------------------------------- */ - -div.related { - width: 100%; - font-size: 90%; -} - -div.related h3 { - display: none; -} - -div.related ul { - margin: 0; - padding: 0 0 0 10px; - list-style: none; -} - -div.related li { - display: inline; -} - -div.related li.right { - float: right; - margin-right: 5px; -} - -/* -- sidebar --------------------------------------------------------------- */ - -div.sphinxsidebarwrapper { - padding: 10px 5px 0 10px; -} - -div.sphinxsidebar { - float: left; - width: 230px; - margin-left: -100%; - font-size: 90%; - word-wrap: break-word; - overflow-wrap : break-word; -} - -div.sphinxsidebar ul { - list-style: none; -} - -div.sphinxsidebar ul ul, -div.sphinxsidebar ul.want-points { - margin-left: 20px; - list-style: square; -} - -div.sphinxsidebar ul ul { - margin-top: 0; - margin-bottom: 0; -} - -div.sphinxsidebar form { - margin-top: 10px; -} - -div.sphinxsidebar input { - border: 1px solid #98dbcc; - font-family: sans-serif; - font-size: 1em; -} - -div.sphinxsidebar #searchbox input[type="text"] { - width: 170px; -} - -img { - border: 0; - max-width: 100%; -} - -/* -- search page ----------------------------------------------------------- */ - -ul.search { - margin: 10px 0 0 20px; - padding: 0; -} - -ul.search li { - padding: 5px 0 5px 20px; - background-image: url(file.png); - background-repeat: no-repeat; - background-position: 0 7px; -} - -ul.search li a { - font-weight: bold; -} - -ul.search li div.context { - color: #888; - margin: 2px 0 0 30px; - text-align: left; -} - -ul.keywordmatches li.goodmatch a { - font-weight: bold; -} - -/* -- index page ------------------------------------------------------------ */ - -table.contentstable { - width: 90%; - margin-left: auto; - margin-right: auto; -} - -table.contentstable p.biglink { - line-height: 150%; -} - -a.biglink { - font-size: 1.3em; -} - -span.linkdescr { - font-style: italic; - padding-top: 5px; - font-size: 90%; -} - -/* -- general index --------------------------------------------------------- */ - -table.indextable { - width: 100%; -} - -table.indextable td { - text-align: left; - vertical-align: top; -} - -table.indextable ul { - margin-top: 0; - margin-bottom: 0; - list-style-type: none; -} - -table.indextable > tbody > tr > td > ul { - padding-left: 0em; -} - -table.indextable tr.pcap { - height: 10px; -} - -table.indextable tr.cap { - margin-top: 10px; - background-color: #f2f2f2; -} - -img.toggler { - margin-right: 3px; - margin-top: 3px; - cursor: pointer; -} - -div.modindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; -} - -div.genindex-jumpbox { - border-top: 1px solid #ddd; - border-bottom: 1px solid #ddd; - margin: 1em 0 1em 0; - padding: 0.4em; -} - -/* -- domain module index --------------------------------------------------- */ - -table.modindextable td { - padding: 2px; - border-collapse: collapse; -} - -/* -- general body styles --------------------------------------------------- */ - -div.body p, div.body dd, div.body li, div.body blockquote { - -moz-hyphens: auto; - -ms-hyphens: auto; - -webkit-hyphens: auto; - hyphens: auto; -} - -a.headerlink { - visibility: hidden; -} - -h1:hover > a.headerlink, -h2:hover > a.headerlink, -h3:hover > a.headerlink, -h4:hover > a.headerlink, -h5:hover > a.headerlink, -h6:hover > a.headerlink, -dt:hover > a.headerlink, -caption:hover > a.headerlink, -p.caption:hover > a.headerlink, -div.code-block-caption:hover > a.headerlink { - visibility: visible; -} - -div.body p.caption { - text-align: inherit; -} - -div.body td { - text-align: left; -} - -.first { - margin-top: 0 !important; -} - -p.rubric { - margin-top: 30px; - font-weight: bold; -} - -img.align-left, .figure.align-left, object.align-left { - clear: left; - float: left; - margin-right: 1em; -} - -img.align-right, .figure.align-right, object.align-right { - clear: right; - float: right; - margin-left: 1em; -} - -img.align-center, .figure.align-center, object.align-center { - display: block; - margin-left: auto; - margin-right: auto; -} - -.align-left { - text-align: left; -} - -.align-center { - text-align: center; -} - -.align-right { - text-align: right; -} - -/* -- sidebars -------------------------------------------------------------- */ - -div.sidebar { - margin: 0 0 0.5em 1em; - border: 1px solid #ddb; - padding: 7px 7px 0 7px; - background-color: #ffe; - width: 40%; - float: right; -} - -p.sidebar-title { - font-weight: bold; -} - -/* -- topics ---------------------------------------------------------------- */ - -div.topic { - border: 1px solid #ccc; - padding: 7px 7px 0 7px; - margin: 10px 0 10px 0; -} - -p.topic-title { - font-size: 1.1em; - font-weight: bold; - margin-top: 10px; -} - -/* -- admonitions ----------------------------------------------------------- */ - -div.admonition { - margin-top: 10px; - margin-bottom: 10px; - padding: 7px; -} - -div.admonition dt { - font-weight: bold; -} - -div.admonition dl { - margin-bottom: 0; -} - -p.admonition-title { - margin: 0px 10px 5px 0px; - font-weight: bold; -} - -div.body p.centered { - text-align: center; - margin-top: 25px; -} - -/* -- tables ---------------------------------------------------------------- */ - -table.docutils { - border: 0; - border-collapse: collapse; -} - -table caption span.caption-number { - font-style: italic; -} - -table caption span.caption-text { -} - -table.docutils td, table.docutils th { - padding: 1px 8px 1px 5px; - border-top: 0; - border-left: 0; - border-right: 0; - border-bottom: 1px solid #aaa; -} - -table.footnote td, table.footnote th { - border: 0 !important; -} - -th { - text-align: left; - padding-right: 5px; -} - -table.citation { - border-left: solid 1px gray; - margin-left: 1px; -} - -table.citation td { - border-bottom: none; -} - -/* -- figures --------------------------------------------------------------- */ - -div.figure { - margin: 0.5em; - padding: 0.5em; -} - -div.figure p.caption { - padding: 0.3em; -} - -div.figure p.caption span.caption-number { - font-style: italic; -} - -div.figure p.caption span.caption-text { -} - -/* -- field list styles ----------------------------------------------------- */ - -table.field-list td, table.field-list th { - border: 0 !important; -} - -.field-list ul { - margin: 0; - padding-left: 1em; -} - -.field-list p { - margin: 0; -} - -.field-name { - -moz-hyphens: manual; - -ms-hyphens: manual; - -webkit-hyphens: manual; - hyphens: manual; -} - -/* -- other body styles ----------------------------------------------------- */ - -ol.arabic { - list-style: decimal; -} - -ol.loweralpha { - list-style: lower-alpha; -} - -ol.upperalpha { - list-style: upper-alpha; -} - -ol.lowerroman { - list-style: lower-roman; -} - -ol.upperroman { - list-style: upper-roman; -} - -dl { - margin-bottom: 15px; -} - -dd p { - margin-top: 0px; -} - -dd ul, dd table { - margin-bottom: 10px; -} - -dd { - margin-top: 3px; - margin-bottom: 10px; - margin-left: 30px; -} - -dt:target, .highlighted { - background-color: #fbe54e; -} - -dl.glossary dt { - font-weight: bold; - font-size: 1.1em; -} - -.optional { - font-size: 1.3em; -} - -.sig-paren { - font-size: larger; -} - -.versionmodified { - font-style: italic; -} - -.system-message { - background-color: #fda; - padding: 5px; - border: 3px solid red; -} - -.footnote:target { - background-color: #ffa; -} - -.line-block { - display: block; - margin-top: 1em; - margin-bottom: 1em; -} - -.line-block .line-block { - margin-top: 0; - margin-bottom: 0; - margin-left: 1.5em; -} - -.guilabel, .menuselection { - font-family: sans-serif; -} - -.accelerator { - text-decoration: underline; -} - -.classifier { - font-style: oblique; -} - -abbr, acronym { - border-bottom: dotted 1px; - cursor: help; -} - -/* -- code displays --------------------------------------------------------- */ - -pre { - overflow: auto; - overflow-y: hidden; /* fixes display issues on Chrome browsers */ -} - -span.pre { - -moz-hyphens: none; - -ms-hyphens: none; - -webkit-hyphens: none; - hyphens: none; -} - -td.linenos pre { - padding: 5px 0px; - border: 0; - background-color: transparent; - color: #aaa; -} - -table.highlighttable { - margin-left: 0.5em; -} - -table.highlighttable td { - padding: 0 0.5em 0 0.5em; -} - -div.code-block-caption { - padding: 2px 5px; - font-size: small; -} - -div.code-block-caption code { - background-color: transparent; -} - -div.code-block-caption + div > div.highlight > pre { - margin-top: 0; -} - -div.code-block-caption span.caption-number { - padding: 0.1em 0.3em; - font-style: italic; -} - -div.code-block-caption span.caption-text { -} - -div.literal-block-wrapper { - padding: 1em 1em 0; -} - -div.literal-block-wrapper div.highlight { - margin: 0; -} - -code.descname { - background-color: transparent; - font-weight: bold; - font-size: 1.2em; -} - -code.descclassname { - background-color: transparent; -} - -code.xref, a code { - background-color: transparent; - font-weight: bold; -} - -h1 code, h2 code, h3 code, h4 code, h5 code, h6 code { - background-color: transparent; -} - -.viewcode-link { - float: right; -} - -.viewcode-back { - float: right; - font-family: sans-serif; -} - -div.viewcode-block:target { - margin: -1px -10px; - padding: 0 10px; -} - -/* -- math display ---------------------------------------------------------- */ - -img.math { - vertical-align: middle; -} - -div.body div.math p { - text-align: center; -} - -span.eqno { - float: right; -} - -span.eqno a.headerlink { - position: relative; - left: 0px; - z-index: 1; -} - -div.math:hover a.headerlink { - visibility: visible; -} - -/* -- printout stylesheet --------------------------------------------------- */ - -@media print { - div.document, - div.documentwrapper, - div.bodywrapper { - margin: 0 !important; - width: 100%; - } - - div.sphinxsidebar, - div.related, - div.footer, - #top-link { - display: none; - } -} \ No newline at end of file diff --git a/docs/_static/comment-bright.png b/docs/_static/comment-bright.png deleted file mode 100644 index 15e27ed..0000000 Binary files a/docs/_static/comment-bright.png and /dev/null differ diff --git a/docs/_static/comment-close.png b/docs/_static/comment-close.png deleted file mode 100644 index 4d91bcf..0000000 Binary files a/docs/_static/comment-close.png and /dev/null differ diff --git a/docs/_static/comment.png b/docs/_static/comment.png deleted file mode 100644 index dfbc0cb..0000000 Binary files a/docs/_static/comment.png and /dev/null differ diff --git a/docs/_static/custom.css b/docs/_static/custom.css deleted file mode 100644 index 2a924f1..0000000 --- a/docs/_static/custom.css +++ /dev/null @@ -1 +0,0 @@ -/* This file intentionally left blank. */ diff --git a/docs/_static/diagrams/submissionContainers.graphml b/docs/_static/diagrams/submissionContainers.graphml deleted file mode 100644 index aa64472..0000000 --- a/docs/_static/diagrams/submissionContainers.graphml +++ /dev/null @@ -1,798 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - File management - - - - - - - - - - - - - - - - - - - - - - - - Compilation - - - - - - - - - - - - - - - - - - - - - - - - Quarantine [Volume] - - - - - - - - - - - - - - - - - - - - - - - - Processed [Volume] - - - - - - - - - - - - - - - - - - - - - - - - Submission DB - - - - - - - - - - - - - - - - - - - - - - - - Notification Broker - - - - - - - - - - - - - - - - - - - - - - - - Compiled [Volume] - - - - - - - - - - - - - - - - - - - - - - - - Webhook API - - - - - - - - - - - - - - - - - - - - - - - - Webhook Agent - - - - - - - - - - - - - - - - - - - - - - - - Classifier - - - - - - - - - - - - - - - - - - - - - - - - Fulltext extraction - - - - - - - - - - - - - - - - - - - - - - - - Task queue -[Redis] - - - - - - - - - - - - - - - - - - - - - - - - Webhook Registry - - - - - - - - - - - - - - - - - - - - - - - - - - - Core interface svcs - - - - - - - - - - - - - - - - - - - - - Folder 1 - - - - - - - - - - - - - - - - Submission API -[Flask + CoreEvents] - - - - - - - - - - - - - - - - - - - - - - - - Submission UI -[Flask + CoreEvents] - - - - - - - - - - - - - - - - - - - - - - - - Moderation UI -[Flask + CoreEvents] - - - - - - - - - - - - - - - - - - - - - - - - Submission Worker - - - - - - - - - - - - - - - - - - - - - - - - Admin UI -[Flask + CoreEvents] - - - - - - - - - - - - - - - - - - - - - - - - - - Endorsement - - - - - - - - - - - - - - - - - - - - - - - - Fulltext store -[Document store] - - - - - - - - - - - - - - - - - - - - - - - - Classification store -[Document store] - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Consume - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - Produce - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - - diff --git a/docs/_static/diagrams/submissionContainers.png b/docs/_static/diagrams/submissionContainers.png deleted file mode 100644 index d904163..0000000 Binary files a/docs/_static/diagrams/submissionContainers.png and /dev/null differ diff --git a/docs/_static/diagrams/submissionState.graphml b/docs/_static/diagrams/submissionState.graphml deleted file mode 100644 index e0b355a..0000000 --- a/docs/_static/diagrams/submissionState.graphml +++ /dev/null @@ -1,226 +0,0 @@ - - - - - - - - - - - - - - - - - - - - - - - - - Working - - - - - - - - - - - - - - - - - - - Processing - - - - - - - - - - - - - - - - - - - Submitted - - - - - - - - - - - - - - - - - - - Scheduled - - - - - - - - - - - - - - - - - - - Published - - - - - - - - - - - - - - - - - - - On Hold - - - - - - - - - - - - - - - - - - Moderator flag applied - - - - - - - - - - - - - Required (meta)data added, source OK - - - - - - - - - - - - - Required pre-moderation checks pass - - - - - - - - - - - - - No flags present at cutoff time - - - - - - - - - - - - - arXiv ID/version and publication timestamp added - - - - - - - - - - - - - Pre-moderation hold applied - - - - - - - - - - - - - Flag(s) cleared by admin - - - - - - - - - - diff --git a/docs/_static/diagrams/submissionState.png b/docs/_static/diagrams/submissionState.png deleted file mode 100644 index eccaf7a..0000000 Binary files a/docs/_static/diagrams/submissionState.png and /dev/null differ diff --git a/docs/_static/doctools.js b/docs/_static/doctools.js deleted file mode 100644 index 5654977..0000000 --- a/docs/_static/doctools.js +++ /dev/null @@ -1,287 +0,0 @@ -/* - * doctools.js - * ~~~~~~~~~~~ - * - * Sphinx JavaScript utilities for all documentation. - * - * :copyright: Copyright 2007-2017 by the Sphinx team, see AUTHORS. - * :license: BSD, see LICENSE for details. - * - */ - -/** - * select a different prefix for underscore - */ -$u = _.noConflict(); - -/** - * make the code below compatible with browsers without - * an installed firebug like debugger -if (!window.console || !console.firebug) { - var names = ["log", "debug", "info", "warn", "error", "assert", "dir", - "dirxml", "group", "groupEnd", "time", "timeEnd", "count", "trace", - "profile", "profileEnd"]; - window.console = {}; - for (var i = 0; i < names.length; ++i) - window.console[names[i]] = function() {}; -} - */ - -/** - * small helper function to urldecode strings - */ -jQuery.urldecode = function(x) { - return decodeURIComponent(x).replace(/\+/g, ' '); -}; - -/** - * small helper function to urlencode strings - */ -jQuery.urlencode = encodeURIComponent; - -/** - * This function returns the parsed url parameters of the - * current request. Multiple values per key are supported, - * it will always return arrays of strings for the value parts. - */ -jQuery.getQueryParameters = function(s) { - if (typeof s == 'undefined') - s = document.location.search; - var parts = s.substr(s.indexOf('?') + 1).split('&'); - var result = {}; - for (var i = 0; i < parts.length; i++) { - var tmp = parts[i].split('=', 2); - var key = jQuery.urldecode(tmp[0]); - var value = jQuery.urldecode(tmp[1]); - if (key in result) - result[key].push(value); - else - result[key] = [value]; - } - return result; -}; - -/** - * highlight a given string on a jquery object by wrapping it in - * span elements with the given class name. - */ -jQuery.fn.highlightText = function(text, className) { - function highlight(node) { - if (node.nodeType == 3) { - var val = node.nodeValue; - var pos = val.toLowerCase().indexOf(text); - if (pos >= 0 && !jQuery(node.parentNode).hasClass(className)) { - var span = document.createElement("span"); - span.className = className; - span.appendChild(document.createTextNode(val.substr(pos, text.length))); - node.parentNode.insertBefore(span, node.parentNode.insertBefore( - document.createTextNode(val.substr(pos + text.length)), - node.nextSibling)); - node.nodeValue = val.substr(0, pos); - } - } - else if (!jQuery(node).is("button, select, textarea")) { - jQuery.each(node.childNodes, function() { - highlight(this); - }); - } - } - return this.each(function() { - highlight(this); - }); -}; - -/* - * backward compatibility for jQuery.browser - * This will be supported until firefox bug is fixed. - */ -if (!jQuery.browser) { - jQuery.uaMatch = function(ua) { - ua = ua.toLowerCase(); - - var match = /(chrome)[ \/]([\w.]+)/.exec(ua) || - /(webkit)[ \/]([\w.]+)/.exec(ua) || - /(opera)(?:.*version|)[ \/]([\w.]+)/.exec(ua) || - /(msie) ([\w.]+)/.exec(ua) || - ua.indexOf("compatible") < 0 && /(mozilla)(?:.*? rv:([\w.]+)|)/.exec(ua) || - []; - - return { - browser: match[ 1 ] || "", - version: match[ 2 ] || "0" - }; - }; - jQuery.browser = {}; - jQuery.browser[jQuery.uaMatch(navigator.userAgent).browser] = true; -} - -/** - * Small JavaScript module for the documentation. - */ -var Documentation = { - - init : function() { - this.fixFirefoxAnchorBug(); - this.highlightSearchWords(); - this.initIndexTable(); - - }, - - /** - * i18n support - */ - TRANSLATIONS : {}, - PLURAL_EXPR : function(n) { return n == 1 ? 0 : 1; }, - LOCALE : 'unknown', - - // gettext and ngettext don't access this so that the functions - // can safely bound to a different name (_ = Documentation.gettext) - gettext : function(string) { - var translated = Documentation.TRANSLATIONS[string]; - if (typeof translated == 'undefined') - return string; - return (typeof translated == 'string') ? translated : translated[0]; - }, - - ngettext : function(singular, plural, n) { - var translated = Documentation.TRANSLATIONS[singular]; - if (typeof translated == 'undefined') - return (n == 1) ? singular : plural; - return translated[Documentation.PLURALEXPR(n)]; - }, - - addTranslations : function(catalog) { - for (var key in catalog.messages) - this.TRANSLATIONS[key] = catalog.messages[key]; - this.PLURAL_EXPR = new Function('n', 'return +(' + catalog.plural_expr + ')'); - this.LOCALE = catalog.locale; - }, - - /** - * add context elements like header anchor links - */ - addContextElements : function() { - $('div[id] > :header:first').each(function() { - $('\u00B6'). - attr('href', '#' + this.id). - attr('title', _('Permalink to this headline')). - appendTo(this); - }); - $('dt[id]').each(function() { - $('\u00B6'). - attr('href', '#' + this.id). - attr('title', _('Permalink to this definition')). - appendTo(this); - }); - }, - - /** - * workaround a firefox stupidity - * see: https://bugzilla.mozilla.org/show_bug.cgi?id=645075 - */ - fixFirefoxAnchorBug : function() { - if (document.location.hash) - window.setTimeout(function() { - document.location.href += ''; - }, 10); - }, - - /** - * highlight the search words provided in the url in the text - */ - highlightSearchWords : function() { - var params = $.getQueryParameters(); - var terms = (params.highlight) ? params.highlight[0].split(/\s+/) : []; - if (terms.length) { - var body = $('div.body'); - if (!body.length) { - body = $('body'); - } - window.setTimeout(function() { - $.each(terms, function() { - body.highlightText(this.toLowerCase(), 'highlighted'); - }); - }, 10); - $('') - .appendTo($('#searchbox')); - } - }, - - /** - * init the domain index toggle buttons - */ - initIndexTable : function() { - var togglers = $('img.toggler').click(function() { - var src = $(this).attr('src'); - var idnum = $(this).attr('id').substr(7); - $('tr.cg-' + idnum).toggle(); - if (src.substr(-9) == 'minus.png') - $(this).attr('src', src.substr(0, src.length-9) + 'plus.png'); - else - $(this).attr('src', src.substr(0, src.length-8) + 'minus.png'); - }).css('display', ''); - if (DOCUMENTATION_OPTIONS.COLLAPSE_INDEX) { - togglers.click(); - } - }, - - /** - * helper function to hide the search marks again - */ - hideSearchWords : function() { - $('#searchbox .highlight-link').fadeOut(300); - $('span.highlighted').removeClass('highlighted'); - }, - - /** - * make the url absolute - */ - makeURL : function(relativeURL) { - return DOCUMENTATION_OPTIONS.URL_ROOT + '/' + relativeURL; - }, - - /** - * get the current relative url - */ - getCurrentURL : function() { - var path = document.location.pathname; - var parts = path.split(/\//); - $.each(DOCUMENTATION_OPTIONS.URL_ROOT.split(/\//), function() { - if (this == '..') - parts.pop(); - }); - var url = parts.join('/'); - return path.substring(url.lastIndexOf('/') + 1, path.length - 1); - }, - - initOnKeyListeners: function() { - $(document).keyup(function(event) { - var activeElementType = document.activeElement.tagName; - // don't navigate when in search box or textarea - if (activeElementType !== 'TEXTAREA' && activeElementType !== 'INPUT' && activeElementType !== 'SELECT') { - switch (event.keyCode) { - case 37: // left - var prevHref = $('link[rel="prev"]').prop('href'); - if (prevHref) { - window.location.href = prevHref; - return false; - } - case 39: // right - var nextHref = $('link[rel="next"]').prop('href'); - if (nextHref) { - window.location.href = nextHref; - return false; - } - } - } - }); - } -}; - -// quick alias for translations -_ = Documentation.gettext; - -$(document).ready(function() { - Documentation.init(); -}); \ No newline at end of file diff --git a/docs/_static/documentation_options.js b/docs/_static/documentation_options.js deleted file mode 100644 index fb47b84..0000000 --- a/docs/_static/documentation_options.js +++ /dev/null @@ -1,9 +0,0 @@ -var DOCUMENTATION_OPTIONS = { - URL_ROOT: '', - VERSION: '0.1', - LANGUAGE: 'None', - COLLAPSE_INDEX: false, - FILE_SUFFIX: '.html', - HAS_SOURCE: true, - SOURCELINK_SUFFIX: '.txt' -}; \ No newline at end of file diff --git a/docs/_static/down-pressed.png b/docs/_static/down-pressed.png deleted file mode 100644 index 5756c8c..0000000 Binary files a/docs/_static/down-pressed.png and /dev/null differ diff --git a/docs/_static/down.png b/docs/_static/down.png deleted file mode 100644 index 1b3bdad..0000000 Binary files a/docs/_static/down.png and /dev/null differ diff --git a/docs/_static/file.png b/docs/_static/file.png deleted file mode 100644 index a858a41..0000000 Binary files a/docs/_static/file.png and /dev/null differ diff --git a/docs/_static/jquery-3.1.0.js b/docs/_static/jquery-3.1.0.js deleted file mode 100644 index f2fc274..0000000 --- a/docs/_static/jquery-3.1.0.js +++ /dev/null @@ -1,10074 +0,0 @@ -/*eslint-disable no-unused-vars*/ -/*! - * jQuery JavaScript Library v3.1.0 - * https://jquery.com/ - * - * Includes Sizzle.js - * https://sizzlejs.com/ - * - * Copyright jQuery Foundation and other contributors - * Released under the MIT license - * https://jquery.org/license - * - * Date: 2016-07-07T21:44Z - */ -( function( global, factory ) { - - "use strict"; - - if ( typeof module === "object" && typeof module.exports === "object" ) { - - // For CommonJS and CommonJS-like environments where a proper `window` - // is present, execute the factory and get jQuery. - // For environments that do not have a `window` with a `document` - // (such as Node.js), expose a factory as module.exports. - // This accentuates the need for the creation of a real `window`. - // e.g. var jQuery = require("jquery")(window); - // See ticket #14549 for more info. - module.exports = global.document ? - factory( global, true ) : - function( w ) { - if ( !w.document ) { - throw new Error( "jQuery requires a window with a document" ); - } - return factory( w ); - }; - } else { - factory( global ); - } - -// Pass this if window is not defined yet -} )( typeof window !== "undefined" ? window : this, function( window, noGlobal ) { - -// Edge <= 12 - 13+, Firefox <=18 - 45+, IE 10 - 11, Safari 5.1 - 9+, iOS 6 - 9.1 -// throw exceptions when non-strict code (e.g., ASP.NET 4.5) accesses strict mode -// arguments.callee.caller (trac-13335). But as of jQuery 3.0 (2016), strict mode should be common -// enough that all such attempts are guarded in a try block. -"use strict"; - -var arr = []; - -var document = window.document; - -var getProto = Object.getPrototypeOf; - -var slice = arr.slice; - -var concat = arr.concat; - -var push = arr.push; - -var indexOf = arr.indexOf; - -var class2type = {}; - -var toString = class2type.toString; - -var hasOwn = class2type.hasOwnProperty; - -var fnToString = hasOwn.toString; - -var ObjectFunctionString = fnToString.call( Object ); - -var support = {}; - - - - function DOMEval( code, doc ) { - doc = doc || document; - - var script = doc.createElement( "script" ); - - script.text = code; - doc.head.appendChild( script ).parentNode.removeChild( script ); - } -/* global Symbol */ -// Defining this global in .eslintrc would create a danger of using the global -// unguarded in another place, it seems safer to define global only for this module - - - -var - version = "3.1.0", - - // Define a local copy of jQuery - jQuery = function( selector, context ) { - - // The jQuery object is actually just the init constructor 'enhanced' - // Need init if jQuery is called (just allow error to be thrown if not included) - return new jQuery.fn.init( selector, context ); - }, - - // Support: Android <=4.0 only - // Make sure we trim BOM and NBSP - rtrim = /^[\s\uFEFF\xA0]+|[\s\uFEFF\xA0]+$/g, - - // Matches dashed string for camelizing - rmsPrefix = /^-ms-/, - rdashAlpha = /-([a-z])/g, - - // Used by jQuery.camelCase as callback to replace() - fcamelCase = function( all, letter ) { - return letter.toUpperCase(); - }; - -jQuery.fn = jQuery.prototype = { - - // The current version of jQuery being used - jquery: version, - - constructor: jQuery, - - // The default length of a jQuery object is 0 - length: 0, - - toArray: function() { - return slice.call( this ); - }, - - // Get the Nth element in the matched element set OR - // Get the whole matched element set as a clean array - get: function( num ) { - return num != null ? - - // Return just the one element from the set - ( num < 0 ? this[ num + this.length ] : this[ num ] ) : - - // Return all the elements in a clean array - slice.call( this ); - }, - - // Take an array of elements and push it onto the stack - // (returning the new matched element set) - pushStack: function( elems ) { - - // Build a new jQuery matched element set - var ret = jQuery.merge( this.constructor(), elems ); - - // Add the old object onto the stack (as a reference) - ret.prevObject = this; - - // Return the newly-formed element set - return ret; - }, - - // Execute a callback for every element in the matched set. - each: function( callback ) { - return jQuery.each( this, callback ); - }, - - map: function( callback ) { - return this.pushStack( jQuery.map( this, function( elem, i ) { - return callback.call( elem, i, elem ); - } ) ); - }, - - slice: function() { - return this.pushStack( slice.apply( this, arguments ) ); - }, - - first: function() { - return this.eq( 0 ); - }, - - last: function() { - return this.eq( -1 ); - }, - - eq: function( i ) { - var len = this.length, - j = +i + ( i < 0 ? len : 0 ); - return this.pushStack( j >= 0 && j < len ? [ this[ j ] ] : [] ); - }, - - end: function() { - return this.prevObject || this.constructor(); - }, - - // For internal use only. - // Behaves like an Array's method, not like a jQuery method. - push: push, - sort: arr.sort, - splice: arr.splice -}; - -jQuery.extend = jQuery.fn.extend = function() { - var options, name, src, copy, copyIsArray, clone, - target = arguments[ 0 ] || {}, - i = 1, - length = arguments.length, - deep = false; - - // Handle a deep copy situation - if ( typeof target === "boolean" ) { - deep = target; - - // Skip the boolean and the target - target = arguments[ i ] || {}; - i++; - } - - // Handle case when target is a string or something (possible in deep copy) - if ( typeof target !== "object" && !jQuery.isFunction( target ) ) { - target = {}; - } - - // Extend jQuery itself if only one argument is passed - if ( i === length ) { - target = this; - i--; - } - - for ( ; i < length; i++ ) { - - // Only deal with non-null/undefined values - if ( ( options = arguments[ i ] ) != null ) { - - // Extend the base object - for ( name in options ) { - src = target[ name ]; - copy = options[ name ]; - - // Prevent never-ending loop - if ( target === copy ) { - continue; - } - - // Recurse if we're merging plain objects or arrays - if ( deep && copy && ( jQuery.isPlainObject( copy ) || - ( copyIsArray = jQuery.isArray( copy ) ) ) ) { - - if ( copyIsArray ) { - copyIsArray = false; - clone = src && jQuery.isArray( src ) ? src : []; - - } else { - clone = src && jQuery.isPlainObject( src ) ? src : {}; - } - - // Never move original objects, clone them - target[ name ] = jQuery.extend( deep, clone, copy ); - - // Don't bring in undefined values - } else if ( copy !== undefined ) { - target[ name ] = copy; - } - } - } - } - - // Return the modified object - return target; -}; - -jQuery.extend( { - - // Unique for each copy of jQuery on the page - expando: "jQuery" + ( version + Math.random() ).replace( /\D/g, "" ), - - // Assume jQuery is ready without the ready module - isReady: true, - - error: function( msg ) { - throw new Error( msg ); - }, - - noop: function() {}, - - isFunction: function( obj ) { - return jQuery.type( obj ) === "function"; - }, - - isArray: Array.isArray, - - isWindow: function( obj ) { - return obj != null && obj === obj.window; - }, - - isNumeric: function( obj ) { - - // As of jQuery 3.0, isNumeric is limited to - // strings and numbers (primitives or objects) - // that can be coerced to finite numbers (gh-2662) - var type = jQuery.type( obj ); - return ( type === "number" || type === "string" ) && - - // parseFloat NaNs numeric-cast false positives ("") - // ...but misinterprets leading-number strings, particularly hex literals ("0x...") - // subtraction forces infinities to NaN - !isNaN( obj - parseFloat( obj ) ); - }, - - isPlainObject: function( obj ) { - var proto, Ctor; - - // Detect obvious negatives - // Use toString instead of jQuery.type to catch host objects - if ( !obj || toString.call( obj ) !== "[object Object]" ) { - return false; - } - - proto = getProto( obj ); - - // Objects with no prototype (e.g., `Object.create( null )`) are plain - if ( !proto ) { - return true; - } - - // Objects with prototype are plain iff they were constructed by a global Object function - Ctor = hasOwn.call( proto, "constructor" ) && proto.constructor; - return typeof Ctor === "function" && fnToString.call( Ctor ) === ObjectFunctionString; - }, - - isEmptyObject: function( obj ) { - - /* eslint-disable no-unused-vars */ - // See https://github.com/eslint/eslint/issues/6125 - var name; - - for ( name in obj ) { - return false; - } - return true; - }, - - type: function( obj ) { - if ( obj == null ) { - return obj + ""; - } - - // Support: Android <=2.3 only (functionish RegExp) - return typeof obj === "object" || typeof obj === "function" ? - class2type[ toString.call( obj ) ] || "object" : - typeof obj; - }, - - // Evaluates a script in a global context - globalEval: function( code ) { - DOMEval( code ); - }, - - // Convert dashed to camelCase; used by the css and data modules - // Support: IE <=9 - 11, Edge 12 - 13 - // Microsoft forgot to hump their vendor prefix (#9572) - camelCase: function( string ) { - return string.replace( rmsPrefix, "ms-" ).replace( rdashAlpha, fcamelCase ); - }, - - nodeName: function( elem, name ) { - return elem.nodeName && elem.nodeName.toLowerCase() === name.toLowerCase(); - }, - - each: function( obj, callback ) { - var length, i = 0; - - if ( isArrayLike( obj ) ) { - length = obj.length; - for ( ; i < length; i++ ) { - if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { - break; - } - } - } else { - for ( i in obj ) { - if ( callback.call( obj[ i ], i, obj[ i ] ) === false ) { - break; - } - } - } - - return obj; - }, - - // Support: Android <=4.0 only - trim: function( text ) { - return text == null ? - "" : - ( text + "" ).replace( rtrim, "" ); - }, - - // results is for internal usage only - makeArray: function( arr, results ) { - var ret = results || []; - - if ( arr != null ) { - if ( isArrayLike( Object( arr ) ) ) { - jQuery.merge( ret, - typeof arr === "string" ? - [ arr ] : arr - ); - } else { - push.call( ret, arr ); - } - } - - return ret; - }, - - inArray: function( elem, arr, i ) { - return arr == null ? -1 : indexOf.call( arr, elem, i ); - }, - - // Support: Android <=4.0 only, PhantomJS 1 only - // push.apply(_, arraylike) throws on ancient WebKit - merge: function( first, second ) { - var len = +second.length, - j = 0, - i = first.length; - - for ( ; j < len; j++ ) { - first[ i++ ] = second[ j ]; - } - - first.length = i; - - return first; - }, - - grep: function( elems, callback, invert ) { - var callbackInverse, - matches = [], - i = 0, - length = elems.length, - callbackExpect = !invert; - - // Go through the array, only saving the items - // that pass the validator function - for ( ; i < length; i++ ) { - callbackInverse = !callback( elems[ i ], i ); - if ( callbackInverse !== callbackExpect ) { - matches.push( elems[ i ] ); - } - } - - return matches; - }, - - // arg is for internal usage only - map: function( elems, callback, arg ) { - var length, value, - i = 0, - ret = []; - - // Go through the array, translating each of the items to their new values - if ( isArrayLike( elems ) ) { - length = elems.length; - for ( ; i < length; i++ ) { - value = callback( elems[ i ], i, arg ); - - if ( value != null ) { - ret.push( value ); - } - } - - // Go through every key on the object, - } else { - for ( i in elems ) { - value = callback( elems[ i ], i, arg ); - - if ( value != null ) { - ret.push( value ); - } - } - } - - // Flatten any nested arrays - return concat.apply( [], ret ); - }, - - // A global GUID counter for objects - guid: 1, - - // Bind a function to a context, optionally partially applying any - // arguments. - proxy: function( fn, context ) { - var tmp, args, proxy; - - if ( typeof context === "string" ) { - tmp = fn[ context ]; - context = fn; - fn = tmp; - } - - // Quick check to determine if target is callable, in the spec - // this throws a TypeError, but we will just return undefined. - if ( !jQuery.isFunction( fn ) ) { - return undefined; - } - - // Simulated bind - args = slice.call( arguments, 2 ); - proxy = function() { - return fn.apply( context || this, args.concat( slice.call( arguments ) ) ); - }; - - // Set the guid of unique handler to the same of original handler, so it can be removed - proxy.guid = fn.guid = fn.guid || jQuery.guid++; - - return proxy; - }, - - now: Date.now, - - // jQuery.support is not used in Core but other projects attach their - // properties to it so it needs to exist. - support: support -} ); - -if ( typeof Symbol === "function" ) { - jQuery.fn[ Symbol.iterator ] = arr[ Symbol.iterator ]; -} - -// Populate the class2type map -jQuery.each( "Boolean Number String Function Array Date RegExp Object Error Symbol".split( " " ), -function( i, name ) { - class2type[ "[object " + name + "]" ] = name.toLowerCase(); -} ); - -function isArrayLike( obj ) { - - // Support: real iOS 8.2 only (not reproducible in simulator) - // `in` check used to prevent JIT error (gh-2145) - // hasOwn isn't used here due to false negatives - // regarding Nodelist length in IE - var length = !!obj && "length" in obj && obj.length, - type = jQuery.type( obj ); - - if ( type === "function" || jQuery.isWindow( obj ) ) { - return false; - } - - return type === "array" || length === 0 || - typeof length === "number" && length > 0 && ( length - 1 ) in obj; -} -var Sizzle = -/*! - * Sizzle CSS Selector Engine v2.3.0 - * https://sizzlejs.com/ - * - * Copyright jQuery Foundation and other contributors - * Released under the MIT license - * http://jquery.org/license - * - * Date: 2016-01-04 - */ -(function( window ) { - -var i, - support, - Expr, - getText, - isXML, - tokenize, - compile, - select, - outermostContext, - sortInput, - hasDuplicate, - - // Local document vars - setDocument, - document, - docElem, - documentIsHTML, - rbuggyQSA, - rbuggyMatches, - matches, - contains, - - // Instance-specific data - expando = "sizzle" + 1 * new Date(), - preferredDoc = window.document, - dirruns = 0, - done = 0, - classCache = createCache(), - tokenCache = createCache(), - compilerCache = createCache(), - sortOrder = function( a, b ) { - if ( a === b ) { - hasDuplicate = true; - } - return 0; - }, - - // Instance methods - hasOwn = ({}).hasOwnProperty, - arr = [], - pop = arr.pop, - push_native = arr.push, - push = arr.push, - slice = arr.slice, - // Use a stripped-down indexOf as it's faster than native - // https://jsperf.com/thor-indexof-vs-for/5 - indexOf = function( list, elem ) { - var i = 0, - len = list.length; - for ( ; i < len; i++ ) { - if ( list[i] === elem ) { - return i; - } - } - return -1; - }, - - booleans = "checked|selected|async|autofocus|autoplay|controls|defer|disabled|hidden|ismap|loop|multiple|open|readonly|required|scoped", - - // Regular expressions - - // http://www.w3.org/TR/css3-selectors/#whitespace - whitespace = "[\\x20\\t\\r\\n\\f]", - - // http://www.w3.org/TR/CSS21/syndata.html#value-def-identifier - identifier = "(?:\\\\.|[\\w-]|[^\0-\\xa0])+", - - // Attribute selectors: http://www.w3.org/TR/selectors/#attribute-selectors - attributes = "\\[" + whitespace + "*(" + identifier + ")(?:" + whitespace + - // Operator (capture 2) - "*([*^$|!~]?=)" + whitespace + - // "Attribute values must be CSS identifiers [capture 5] or strings [capture 3 or capture 4]" - "*(?:'((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\"|(" + identifier + "))|)" + whitespace + - "*\\]", - - pseudos = ":(" + identifier + ")(?:\\((" + - // To reduce the number of selectors needing tokenize in the preFilter, prefer arguments: - // 1. quoted (capture 3; capture 4 or capture 5) - "('((?:\\\\.|[^\\\\'])*)'|\"((?:\\\\.|[^\\\\\"])*)\")|" + - // 2. simple (capture 6) - "((?:\\\\.|[^\\\\()[\\]]|" + attributes + ")*)|" + - // 3. anything else (capture 2) - ".*" + - ")\\)|)", - - // Leading and non-escaped trailing whitespace, capturing some non-whitespace characters preceding the latter - rwhitespace = new RegExp( whitespace + "+", "g" ), - rtrim = new RegExp( "^" + whitespace + "+|((?:^|[^\\\\])(?:\\\\.)*)" + whitespace + "+$", "g" ), - - rcomma = new RegExp( "^" + whitespace + "*," + whitespace + "*" ), - rcombinators = new RegExp( "^" + whitespace + "*([>+~]|" + whitespace + ")" + whitespace + "*" ), - - rattributeQuotes = new RegExp( "=" + whitespace + "*([^\\]'\"]*?)" + whitespace + "*\\]", "g" ), - - rpseudo = new RegExp( pseudos ), - ridentifier = new RegExp( "^" + identifier + "$" ), - - matchExpr = { - "ID": new RegExp( "^#(" + identifier + ")" ), - "CLASS": new RegExp( "^\\.(" + identifier + ")" ), - "TAG": new RegExp( "^(" + identifier + "|[*])" ), - "ATTR": new RegExp( "^" + attributes ), - "PSEUDO": new RegExp( "^" + pseudos ), - "CHILD": new RegExp( "^:(only|first|last|nth|nth-last)-(child|of-type)(?:\\(" + whitespace + - "*(even|odd|(([+-]|)(\\d*)n|)" + whitespace + "*(?:([+-]|)" + whitespace + - "*(\\d+)|))" + whitespace + "*\\)|)", "i" ), - "bool": new RegExp( "^(?:" + booleans + ")$", "i" ), - // For use in libraries implementing .is() - // We use this for POS matching in `select` - "needsContext": new RegExp( "^" + whitespace + "*[>+~]|:(even|odd|eq|gt|lt|nth|first|last)(?:\\(" + - whitespace + "*((?:-\\d)?\\d*)" + whitespace + "*\\)|)(?=[^-]|$)", "i" ) - }, - - rinputs = /^(?:input|select|textarea|button)$/i, - rheader = /^h\d$/i, - - rnative = /^[^{]+\{\s*\[native \w/, - - // Easily-parseable/retrievable ID or TAG or CLASS selectors - rquickExpr = /^(?:#([\w-]+)|(\w+)|\.([\w-]+))$/, - - rsibling = /[+~]/, - - // CSS escapes - // http://www.w3.org/TR/CSS21/syndata.html#escaped-characters - runescape = new RegExp( "\\\\([\\da-f]{1,6}" + whitespace + "?|(" + whitespace + ")|.)", "ig" ), - funescape = function( _, escaped, escapedWhitespace ) { - var high = "0x" + escaped - 0x10000; - // NaN means non-codepoint - // Support: Firefox<24 - // Workaround erroneous numeric interpretation of +"0x" - return high !== high || escapedWhitespace ? - escaped : - high < 0 ? - // BMP codepoint - String.fromCharCode( high + 0x10000 ) : - // Supplemental Plane codepoint (surrogate pair) - String.fromCharCode( high >> 10 | 0xD800, high & 0x3FF | 0xDC00 ); - }, - - // CSS string/identifier serialization - // https://drafts.csswg.org/cssom/#common-serializing-idioms - rcssescape = /([\0-\x1f\x7f]|^-?\d)|^-$|[^\x80-\uFFFF\w-]/g, - fcssescape = function( ch, asCodePoint ) { - if ( asCodePoint ) { - - // U+0000 NULL becomes U+FFFD REPLACEMENT CHARACTER - if ( ch === "\0" ) { - return "\uFFFD"; - } - - // Control characters and (dependent upon position) numbers get escaped as code points - return ch.slice( 0, -1 ) + "\\" + ch.charCodeAt( ch.length - 1 ).toString( 16 ) + " "; - } - - // Other potentially-special ASCII characters get backslash-escaped - return "\\" + ch; - }, - - // Used for iframes - // See setDocument() - // Removing the function wrapper causes a "Permission Denied" - // error in IE - unloadHandler = function() { - setDocument(); - }, - - disabledAncestor = addCombinator( - function( elem ) { - return elem.disabled === true; - }, - { dir: "parentNode", next: "legend" } - ); - -// Optimize for push.apply( _, NodeList ) -try { - push.apply( - (arr = slice.call( preferredDoc.childNodes )), - preferredDoc.childNodes - ); - // Support: Android<4.0 - // Detect silently failing push.apply - arr[ preferredDoc.childNodes.length ].nodeType; -} catch ( e ) { - push = { apply: arr.length ? - - // Leverage slice if possible - function( target, els ) { - push_native.apply( target, slice.call(els) ); - } : - - // Support: IE<9 - // Otherwise append directly - function( target, els ) { - var j = target.length, - i = 0; - // Can't trust NodeList.length - while ( (target[j++] = els[i++]) ) {} - target.length = j - 1; - } - }; -} - -function Sizzle( selector, context, results, seed ) { - var m, i, elem, nid, match, groups, newSelector, - newContext = context && context.ownerDocument, - - // nodeType defaults to 9, since context defaults to document - nodeType = context ? context.nodeType : 9; - - results = results || []; - - // Return early from calls with invalid selector or context - if ( typeof selector !== "string" || !selector || - nodeType !== 1 && nodeType !== 9 && nodeType !== 11 ) { - - return results; - } - - // Try to shortcut find operations (as opposed to filters) in HTML documents - if ( !seed ) { - - if ( ( context ? context.ownerDocument || context : preferredDoc ) !== document ) { - setDocument( context ); - } - context = context || document; - - if ( documentIsHTML ) { - - // If the selector is sufficiently simple, try using a "get*By*" DOM method - // (excepting DocumentFragment context, where the methods don't exist) - if ( nodeType !== 11 && (match = rquickExpr.exec( selector )) ) { - - // ID selector - if ( (m = match[1]) ) { - - // Document context - if ( nodeType === 9 ) { - if ( (elem = context.getElementById( m )) ) { - - // Support: IE, Opera, Webkit - // TODO: identify versions - // getElementById can match elements by name instead of ID - if ( elem.id === m ) { - results.push( elem ); - return results; - } - } else { - return results; - } - - // Element context - } else { - - // Support: IE, Opera, Webkit - // TODO: identify versions - // getElementById can match elements by name instead of ID - if ( newContext && (elem = newContext.getElementById( m )) && - contains( context, elem ) && - elem.id === m ) { - - results.push( elem ); - return results; - } - } - - // Type selector - } else if ( match[2] ) { - push.apply( results, context.getElementsByTagName( selector ) ); - return results; - - // Class selector - } else if ( (m = match[3]) && support.getElementsByClassName && - context.getElementsByClassName ) { - - push.apply( results, context.getElementsByClassName( m ) ); - return results; - } - } - - // Take advantage of querySelectorAll - if ( support.qsa && - !compilerCache[ selector + " " ] && - (!rbuggyQSA || !rbuggyQSA.test( selector )) ) { - - if ( nodeType !== 1 ) { - newContext = context; - newSelector = selector; - - // qSA looks outside Element context, which is not what we want - // Thanks to Andrew Dupont for this workaround technique - // Support: IE <=8 - // Exclude object elements - } else if ( context.nodeName.toLowerCase() !== "object" ) { - - // Capture the context ID, setting it first if necessary - if ( (nid = context.getAttribute( "id" )) ) { - nid = nid.replace( rcssescape, fcssescape ); - } else { - context.setAttribute( "id", (nid = expando) ); - } - - // Prefix every selector in the list - groups = tokenize( selector ); - i = groups.length; - while ( i-- ) { - groups[i] = "#" + nid + " " + toSelector( groups[i] ); - } - newSelector = groups.join( "," ); - - // Expand context for sibling selectors - newContext = rsibling.test( selector ) && testContext( context.parentNode ) || - context; - } - - if ( newSelector ) { - try { - push.apply( results, - newContext.querySelectorAll( newSelector ) - ); - return results; - } catch ( qsaError ) { - } finally { - if ( nid === expando ) { - context.removeAttribute( "id" ); - } - } - } - } - } - } - - // All others - return select( selector.replace( rtrim, "$1" ), context, results, seed ); -} - -/** - * Create key-value caches of limited size - * @returns {function(string, object)} Returns the Object data after storing it on itself with - * property name the (space-suffixed) string and (if the cache is larger than Expr.cacheLength) - * deleting the oldest entry - */ -function createCache() { - var keys = []; - - function cache( key, value ) { - // Use (key + " ") to avoid collision with native prototype properties (see Issue #157) - if ( keys.push( key + " " ) > Expr.cacheLength ) { - // Only keep the most recent entries - delete cache[ keys.shift() ]; - } - return (cache[ key + " " ] = value); - } - return cache; -} - -/** - * Mark a function for special use by Sizzle - * @param {Function} fn The function to mark - */ -function markFunction( fn ) { - fn[ expando ] = true; - return fn; -} - -/** - * Support testing using an element - * @param {Function} fn Passed the created element and returns a boolean result - */ -function assert( fn ) { - var el = document.createElement("fieldset"); - - try { - return !!fn( el ); - } catch (e) { - return false; - } finally { - // Remove from its parent by default - if ( el.parentNode ) { - el.parentNode.removeChild( el ); - } - // release memory in IE - el = null; - } -} - -/** - * Adds the same handler for all of the specified attrs - * @param {String} attrs Pipe-separated list of attributes - * @param {Function} handler The method that will be applied - */ -function addHandle( attrs, handler ) { - var arr = attrs.split("|"), - i = arr.length; - - while ( i-- ) { - Expr.attrHandle[ arr[i] ] = handler; - } -} - -/** - * Checks document order of two siblings - * @param {Element} a - * @param {Element} b - * @returns {Number} Returns less than 0 if a precedes b, greater than 0 if a follows b - */ -function siblingCheck( a, b ) { - var cur = b && a, - diff = cur && a.nodeType === 1 && b.nodeType === 1 && - a.sourceIndex - b.sourceIndex; - - // Use IE sourceIndex if available on both nodes - if ( diff ) { - return diff; - } - - // Check if b follows a - if ( cur ) { - while ( (cur = cur.nextSibling) ) { - if ( cur === b ) { - return -1; - } - } - } - - return a ? 1 : -1; -} - -/** - * Returns a function to use in pseudos for input types - * @param {String} type - */ -function createInputPseudo( type ) { - return function( elem ) { - var name = elem.nodeName.toLowerCase(); - return name === "input" && elem.type === type; - }; -} - -/** - * Returns a function to use in pseudos for buttons - * @param {String} type - */ -function createButtonPseudo( type ) { - return function( elem ) { - var name = elem.nodeName.toLowerCase(); - return (name === "input" || name === "button") && elem.type === type; - }; -} - -/** - * Returns a function to use in pseudos for :enabled/:disabled - * @param {Boolean} disabled true for :disabled; false for :enabled - */ -function createDisabledPseudo( disabled ) { - // Known :disabled false positives: - // IE: *[disabled]:not(button, input, select, textarea, optgroup, option, menuitem, fieldset) - // not IE: fieldset[disabled] > legend:nth-of-type(n+2) :can-disable - return function( elem ) { - - // Check form elements and option elements for explicit disabling - return "label" in elem && elem.disabled === disabled || - "form" in elem && elem.disabled === disabled || - - // Check non-disabled form elements for fieldset[disabled] ancestors - "form" in elem && elem.disabled === false && ( - // Support: IE6-11+ - // Ancestry is covered for us - elem.isDisabled === disabled || - - // Otherwise, assume any non-