diff --git a/.gitignore b/.gitignore new file mode 100644 index 0000000..2dc53ca --- /dev/null +++ b/.gitignore @@ -0,0 +1,160 @@ +# Byte-compiled / optimized / DLL files +__pycache__/ +*.py[cod] +*$py.class + +# C extensions +*.so + +# Distribution / packaging +.Python +build/ +develop-eggs/ +dist/ +downloads/ +eggs/ +.eggs/ +lib/ +lib64/ +parts/ +sdist/ +var/ +wheels/ +share/python-wheels/ +*.egg-info/ +.installed.cfg +*.egg +MANIFEST + +# PyInstaller +# Usually these files are written by a python script from a template +# before PyInstaller builds the exe, so as to inject date/other infos into it. +*.manifest +*.spec + +# Installer logs +pip-log.txt +pip-delete-this-directory.txt + +# Unit test / coverage reports +htmlcov/ +.tox/ +.nox/ +.coverage +.coverage.* +.cache +nosetests.xml +coverage.xml +*.cover +*.py,cover +.hypothesis/ +.pytest_cache/ +cover/ + +# Translations +*.mo +*.pot + +# Django stuff: +*.log +local_settings.py +db.sqlite3 +db.sqlite3-journal + +# Flask stuff: +instance/ +.webassets-cache + +# Scrapy stuff: +.scrapy + +# Sphinx documentation +docs/_build/ + +# PyBuilder +.pybuilder/ +target/ + +# Jupyter Notebook +.ipynb_checkpoints + +# IPython +profile_default/ +ipython_config.py + +# pyenv +# For a library or package, you might want to ignore these files since the code is +# intended to run in multiple environments; otherwise, check them in: +# .python-version + +# pipenv +# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control. +# However, in case of collaboration, if having platform-specific dependencies or dependencies +# having no cross-platform support, pipenv may install dependencies that don't work, or not +# install all needed dependencies. +#Pipfile.lock + +# poetry +# Similar to Pipfile.lock, it is generally recommended to include poetry.lock in version control. +# This is especially recommended for binary packages to ensure reproducibility, and is more +# commonly ignored for libraries. +# https://python-poetry.org/docs/basic-usage/#commit-your-poetrylock-file-to-version-control +#poetry.lock + +# pdm +# Similar to Pipfile.lock, it is generally recommended to include pdm.lock in version control. +#pdm.lock +# pdm stores project-wide configurations in .pdm.toml, but it is recommended to not include it +# in version control. +# https://pdm.fming.dev/#use-with-ide +.pdm.toml + +# PEP 582; used by e.g. github.com/David-OConnor/pyflow and github.com/pdm-project/pdm +__pypackages__/ + +# Celery stuff +celerybeat-schedule +celerybeat.pid + +# SageMath parsed files +*.sage.py + +# Environments +.env +.venv +env/ +venv/ +ENV/ +env.bak/ +venv.bak/ + +# Spyder project settings +.spyderproject +.spyproject + +# Rope project settings +.ropeproject + +# mkdocs documentation +/site + +# mypy +.mypy_cache/ +.dmypy.json +dmypy.json + +# Pyre type checker +.pyre/ + +# pytype static type analyzer +.pytype/ + +# Cython debug symbols +cython_debug/ + +# PyCharm +# JetBrains specific template is maintained in a separate JetBrains.gitignore that can +# be found at https://github.com/github/gitignore/blob/main/Global/JetBrains.gitignore +# and can be added to the global gitignore or merged into this file. For a more nuclear +# option (not recommended) you can uncomment the following to ignore the entire idea folder. +.idea/ diff --git a/LICENSE b/LICENSE new file mode 100644 index 0000000..261eeb9 --- /dev/null +++ b/LICENSE @@ -0,0 +1,201 @@ + Apache License + Version 2.0, January 2004 + http://www.apache.org/licenses/ + + TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION + + 1. Definitions. + + "License" shall mean the terms and conditions for use, reproduction, + and distribution as defined by Sections 1 through 9 of this document. + + "Licensor" shall mean the copyright owner or entity authorized by + the copyright owner that is granting the License. + + "Legal Entity" shall mean the union of the acting entity and all + other entities that control, are controlled by, or are under common + control with that entity. For the purposes of this definition, + "control" means (i) the power, direct or indirect, to cause the + direction or management of such entity, whether by contract or + otherwise, or (ii) ownership of fifty percent (50%) or more of the + outstanding shares, or (iii) beneficial ownership of such entity. + + "You" (or "Your") shall mean an individual or Legal Entity + exercising permissions granted by this License. + + "Source" form shall mean the preferred form for making modifications, + including but not limited to software source code, documentation + source, and configuration files. + + "Object" form shall mean any form resulting from mechanical + transformation or translation of a Source form, including but + not limited to compiled object code, generated documentation, + and conversions to other media types. + + "Work" shall mean the work of authorship, whether in Source or + Object form, made available under the License, as indicated by a + copyright notice that is included in or attached to the work + (an example is provided in the Appendix below). + + "Derivative Works" shall mean any work, whether in Source or Object + form, that is based on (or derived from) the Work and for which the + editorial revisions, annotations, elaborations, or other modifications + represent, as a whole, an original work of authorship. For the purposes + of this License, Derivative Works shall not include works that remain + separable from, or merely link (or bind by name) to the interfaces of, + the Work and Derivative Works thereof. + + "Contribution" shall mean any work of authorship, including + the original version of the Work and any modifications or additions + to that Work or Derivative Works thereof, that is intentionally + submitted to Licensor for inclusion in the Work by the copyright owner + or by an individual or Legal Entity authorized to submit on behalf of + the copyright owner. For the purposes of this definition, "submitted" + means any form of electronic, verbal, or written communication sent + to the Licensor or its representatives, including but not limited to + communication on electronic mailing lists, source code control systems, + and issue tracking systems that are managed by, or on behalf of, the + Licensor for the purpose of discussing and improving the Work, but + excluding communication that is conspicuously marked or otherwise + designated in writing by the copyright owner as "Not a Contribution." + + "Contributor" shall mean Licensor and any individual or Legal Entity + on behalf of whom a Contribution has been received by Licensor and + subsequently incorporated within the Work. + + 2. Grant of Copyright License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + copyright license to reproduce, prepare Derivative Works of, + publicly display, publicly perform, sublicense, and distribute the + Work and such Derivative Works in Source or Object form. + + 3. Grant of Patent License. Subject to the terms and conditions of + this License, each Contributor hereby grants to You a perpetual, + worldwide, non-exclusive, no-charge, royalty-free, irrevocable + (except as stated in this section) patent license to make, have made, + use, offer to sell, sell, import, and otherwise transfer the Work, + where such license applies only to those patent claims licensable + by such Contributor that are necessarily infringed by their + Contribution(s) alone or by combination of their Contribution(s) + with the Work to which such Contribution(s) was submitted. If You + institute patent litigation against any entity (including a + cross-claim or counterclaim in a lawsuit) alleging that the Work + or a Contribution incorporated within the Work constitutes direct + or contributory patent infringement, then any patent licenses + granted to You under this License for that Work shall terminate + as of the date such litigation is filed. + + 4. Redistribution. You may reproduce and distribute copies of the + Work or Derivative Works thereof in any medium, with or without + modifications, and in Source or Object form, provided that You + meet the following conditions: + + (a) You must give any other recipients of the Work or + Derivative Works a copy of this License; and + + (b) You must cause any modified files to carry prominent notices + stating that You changed the files; and + + (c) You must retain, in the Source form of any Derivative Works + that You distribute, all copyright, patent, trademark, and + attribution notices from the Source form of the Work, + excluding those notices that do not pertain to any part of + the Derivative Works; and + + (d) If the Work includes a "NOTICE" text file as part of its + distribution, then any Derivative Works that You distribute must + include a readable copy of the attribution notices contained + within such NOTICE file, excluding those notices that do not + pertain to any part of the Derivative Works, in at least one + of the following places: within a NOTICE text file distributed + as part of the Derivative Works; within the Source form or + documentation, if provided along with the Derivative Works; or, + within a display generated by the Derivative Works, if and + wherever such third-party notices normally appear. The contents + of the NOTICE file are for informational purposes only and + do not modify the License. You may add Your own attribution + notices within Derivative Works that You distribute, alongside + or as an addendum to the NOTICE text from the Work, provided + that such additional attribution notices cannot be construed + as modifying the License. + + You may add Your own copyright statement to Your modifications and + may provide additional or different license terms and conditions + for use, reproduction, or distribution of Your modifications, or + for any such Derivative Works as a whole, provided Your use, + reproduction, and distribution of the Work otherwise complies with + the conditions stated in this License. + + 5. Submission of Contributions. Unless You explicitly state otherwise, + any Contribution intentionally submitted for inclusion in the Work + by You to the Licensor shall be under the terms and conditions of + this License, without any additional terms or conditions. + Notwithstanding the above, nothing herein shall supersede or modify + the terms of any separate license agreement you may have executed + with Licensor regarding such Contributions. + + 6. Trademarks. This License does not grant permission to use the trade + names, trademarks, service marks, or product names of the Licensor, + except as required for reasonable and customary use in describing the + origin of the Work and reproducing the content of the NOTICE file. + + 7. Disclaimer of Warranty. Unless required by applicable law or + agreed to in writing, Licensor provides the Work (and each + Contributor provides its Contributions) on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or + implied, including, without limitation, any warranties or conditions + of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A + PARTICULAR PURPOSE. You are solely responsible for determining the + appropriateness of using or redistributing the Work and assume any + risks associated with Your exercise of permissions under this License. + + 8. Limitation of Liability. In no event and under no legal theory, + whether in tort (including negligence), contract, or otherwise, + unless required by applicable law (such as deliberate and grossly + negligent acts) or agreed to in writing, shall any Contributor be + liable to You for damages, including any direct, indirect, special, + incidental, or consequential damages of any character arising as a + result of this License or out of the use or inability to use the + Work (including but not limited to damages for loss of goodwill, + work stoppage, computer failure or malfunction, or any and all + other commercial damages or losses), even if such Contributor + has been advised of the possibility of such damages. + + 9. Accepting Warranty or Additional Liability. While redistributing + the Work or Derivative Works thereof, You may choose to offer, + and charge a fee for, acceptance of support, warranty, indemnity, + or other liability obligations and/or rights consistent with this + License. However, in accepting such obligations, You may act only + on Your own behalf and on Your sole responsibility, not on behalf + of any other Contributor, and only if You agree to indemnify, + defend, and hold each Contributor harmless for any liability + incurred by, or claims asserted against, such Contributor by reason + of your accepting any such warranty or additional liability. + + END OF TERMS AND CONDITIONS + + APPENDIX: How to apply the Apache License to your work. + + To apply the Apache License to your work, attach the following + boilerplate notice, with the fields enclosed by brackets "[]" + replaced with your own identifying information. (Don't include + the brackets!) The text should be enclosed in the appropriate + comment syntax for the file format. We also recommend that a + file or class name and description of purpose be included on the + same "printed page" as the copyright notice for easier + identification within third-party archives. + + Copyright [yyyy] [name of copyright owner] + + Licensed under the Apache License, Version 2.0 (the "License"); + you may not use this file except in compliance with the License. + You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + + Unless required by applicable law or agreed to in writing, software + distributed under the License is distributed on an "AS IS" BASIS, + WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + See the License for the specific language governing permissions and + limitations under the License. diff --git a/README.md b/README.md index 6493081..f6847c5 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,300 @@ # immudb_wrapper -The wrapper around binary `immuclient` from project Codenotary. +The wrapper around the SDK client `immudb-py` from project Codenotary, which expands the functionality of the original client with additional functions. + +## Table of Contents + +- [Requirements](#requirements) +- [Installation](#installation) +- [Usage](#usage) +- [Contribution](#contribution) + +## Requirements + +- python >= 3.7 +- immudb-py >= 1.4.0 +- GitPython >= 3.1.20 + +## Installation + +You can easily install `immudb_wrapper` into your environment with the following command: + +``` +pip install git+https://git.almalinux.org/danfimov/immudb_wrapper.git@#egg=immudb_wrapper +``` + +To run the `immudb` instance locally, you can use the options from `immudb` [documentation](https://docs.immudb.io/master/running/download.html). + +If you want to use the `immudb` in `docker-compose.yml`, you can add the following in your compose file: + +``` + immudb: + image: codenotary/immudb:latest + ports: + - 3322:3322 + - 9497:9497 + volumes: + - "../volumes/immudb/data:/var/lib/immudb" + - "../volumes/immudb/config:/etc/immudb" + - "../volumes/immudb/logs:/var/log/immudb" +``` + +## Usage + +### Client initialization + +```python3 +client = ImmudbWrapper( + username="user", + password="password", + database="database", +) +``` + +### File notarization + +This method calculates the file hash and file size and inserts them with the user's metadata (if provided), into the database. + +```python3 +response = client.notarize_file( + "./hello_world.sh", + user_metadata={ + "foo": "bar", + }, +) +print(response) +{ + 'id': 1, + 'key': '4db5767d4bf4221a5656b163ef1bae833095255f80d1ad5be21dfef84caf4126', + 'value': { + 'Name': 'hello_world.sh', + 'Kind': 'file', + 'Size': '2.62 KB', + 'Hash': '4db5767d4bf4221a5656b163ef1bae833095255f80d1ad5be21dfef84caf4126', + 'Metadata': { + 'sbom_api_ver': '0.2', + 'foo': 'bar', + }, + }, + 'timestamp': 1690794033, + 'verified': True, + 'refkey': None, + 'revision': 1, +} +``` + +### Git repo notarization + +This method extracts the git metadata from a provided git directory, calculates the hash of the extracted metadata and inserts that metadata with the user's metadata (if provided), into the database. Falls with a `InvalidGitRepositoryError` when accepting non-git directories. + +```python3 +response = client.notarize_git_repo( + "./immudb_wrapper/", + user_metadata={ + "foo": "bar", + }, +) +print(response) +{ + 'id': 2, + 'key': 'a87f7a948900e04812c095fb457e994926fc28c2a789471521fcc076cc4d8658', + 'value': { + 'Name': 'git@git.almalinux.org:danfimov/immudb_wrapper.git@c093e0f', + 'Kind': 'git', + 'Size': '30.52 KB', + 'Hash': 'a87f7a948900e04812c095fb457e994926fc28c2a789471521fcc076cc4d8658', + 'Metadata': { + 'git': { + 'Author': { + 'Email': 'anfimovdan@gmail.com', + 'Name': 'Daniil Anfimov', + 'When': '2023-07-22T12:53:22+0200', + }, + 'Commit': 'c093e0f468c2810f76d4c09c340c58380bd965b1', + 'Committer': { + 'Email': 'anfimovdan@gmail.com', + 'Name': 'Daniil Anfimov', + 'When': '2023-07-22T12:53:22+0200', + }, + 'Message': 'Initial commit\n', + 'PGPSignature': '', + 'Parents': [], + 'Tree': '4526550b9c6b77e7e10f6e57dfecfe0504c5166f', + }, + 'sbom_api_ver': '0.2', + 'foo': 'bar', + }, + }, + 'timestamp': 1690794260, + 'verified': True, + 'refkey': None, + 'revision': 1, +} +``` + +### Git repo authentication + +This method extracts the git metadata from a provided git directory, calculates the hash of the extracted metadata, and looks up the metadata of that hash in the database. Returns a dict with an error if metadata doesn't exist in the database. + +```python3 +response = client.authenticate_git_repo("./immudb_wrapper/") +print(response) +{ + 'id': 2, + 'key': 'a87f7a948900e04812c095fb457e994926fc28c2a789471521fcc076cc4d8658', + 'value': { + 'Name': 'git@git.almalinux.org:danfimov/immudb_wrapper.git@c093e0f', + 'Kind': 'git', + 'Size': '30.52 KB', + 'Hash': 'a87f7a948900e04812c095fb457e994926fc28c2a789471521fcc076cc4d8658', + 'Metadata': { + 'git': { + 'Author': { + 'Email': 'anfimovdan@gmail.com', + 'Name': 'Daniil Anfimov', + 'When': '2023-07-22T12:53:22+0200', + }, + 'Commit': 'c093e0f468c2810f76d4c09c340c58380bd965b1', + 'Committer': { + 'Email': 'anfimovdan@gmail.com', + 'Name': 'Daniil Anfimov', + 'When': '2023-07-22T12:53:22+0200', + }, + 'Message': 'Initial commit\n', + 'PGPSignature': '', + 'Parents': [], + 'Tree': '4526550b9c6b77e7e10f6e57dfecfe0504c5166f', + }, + 'sbom_api_ver': '0.2', + 'foo': 'bar', + }, + }, + 'timestamp': 1690794260, + 'verified': True, + 'refkey': None, + 'revision': 1, +} + +response = client.authenticate_git_repo("./immudb_wrapper_foobar/") +print(response) +{'error': 'Traceback (most recent call last):\n' + ' File "/code/env/bin/immudb_wrapper.py", line 247, in ' + 'verified_get\n' + ' self.verifiedGet(\n' + ' File "/code/env/lib/python3.9/site-packages/immudb/client.py", ' + 'line 667, in verifiedGet\n' + ' return verifiedGet.call(self._stub, self._rs, key, ' + 'verifying_key=self._vk, atRevision=atRevision)\n' + ' File ' + '"/code/env/lib/python3.9/site-packages/immudb/handler/verifiedGet.py", ' + 'line 30, in call\n' + ' ventry = service.VerifiableGet(req)\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 247, in __call__\n' + ' response, ignored_call = self._with_call(request,\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 290, in _with_call\n' + ' return call.result(), call\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 379, in result\n' + ' raise self\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 274, in continuation\n' + ' response, call = self._thunk(new_method).with_call(\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 1043, in with_call\n' + ' return _end_unary_response_blocking(state, call, True, None)\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 910, in _end_unary_response_blocking\n' + ' raise _InactiveRpcError(state) # pytype: ' + 'disable=not-instantiable\n' + 'grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that ' + 'terminated with:\n' + '\tstatus = StatusCode.UNKNOWN\n' + '\tdetails = "tbtree: key not found"\n' + '\tdebug_error_string = "UNKNOWN:Error received from peer ' + 'ipv4:172.18.0.2:3322 {grpc_message:"tbtree: key not found", ' + 'grpc_status:2, ' + 'created_time:"2023-07-31T09:13:55.947555151+00:00"}"\n' + '>\n'} +``` + +### File authentication + +This method calculates the file hash of the provided file and looks up the metadata of that hash in the database. Returns a dict with an error if metadata doesn't exist in the database. + +```python3 +response = client.authenticate_file("./hello_world.sh") +print(response) +{ + 'id': 1, + 'key': '4db5767d4bf4221a5656b163ef1bae833095255f80d1ad5be21dfef84caf4126', + 'value': { + 'Name': 'hello_world.sh', + 'Kind': 'file', + 'Size': '2.62 KB', + 'Hash': '4db5767d4bf4221a5656b163ef1bae833095255f80d1ad5be21dfef84caf4126', + 'Metadata': { + 'sbom_api_ver': '0.2', + 'foo': 'bar', + }, + }, + 'timestamp': 1690794033, + 'verified': True, + 'refkey': None, + 'revision': 1, +} + +response = client.authenticate_file("./hello_world1.sh") +print(response) +{'error': 'Traceback (most recent call last):\n' + ' File "/code/env/bin/immudb_wrapper.py", line 247, in ' + 'verified_get\n' + ' self.verifiedGet(\n' + ' File "/code/env/lib/python3.9/site-packages/immudb/client.py", ' + 'line 667, in verifiedGet\n' + ' return verifiedGet.call(self._stub, self._rs, key, ' + 'verifying_key=self._vk, atRevision=atRevision)\n' + ' File ' + '"/code/env/lib/python3.9/site-packages/immudb/handler/verifiedGet.py", ' + 'line 30, in call\n' + ' ventry = service.VerifiableGet(req)\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 247, in __call__\n' + ' response, ignored_call = self._with_call(request,\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 290, in _with_call\n' + ' return call.result(), call\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 379, in result\n' + ' raise self\n' + ' File ' + '"/code/env/lib64/python3.9/site-packages/grpc/_interceptor.py", ' + 'line 274, in continuation\n' + ' response, call = self._thunk(new_method).with_call(\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 1043, in with_call\n' + ' return _end_unary_response_blocking(state, call, True, None)\n' + ' File "/code/env/lib64/python3.9/site-packages/grpc/_channel.py", ' + 'line 910, in _end_unary_response_blocking\n' + ' raise _InactiveRpcError(state) # pytype: ' + 'disable=not-instantiable\n' + 'grpc._channel._InactiveRpcError: <_InactiveRpcError of RPC that ' + 'terminated with:\n' + '\tstatus = StatusCode.UNKNOWN\n' + '\tdetails = "tbtree: key not found"\n' + '\tdebug_error_string = "UNKNOWN:Error received from peer ' + 'ipv4:172.18.0.2:3322 {grpc_message:"tbtree: key not found", ' + 'grpc_status:2, ' + 'created_time:"2023-07-31T09:13:55.947555151+00:00"}"\n' + '>\n'} +``` + +## Contribution + +If you wish to contribute to `immudb_wrapper`, just create a fork and make a PR. diff --git a/immudb_wrapper.py b/immudb_wrapper.py new file mode 100644 index 0000000..a6daf4c --- /dev/null +++ b/immudb_wrapper.py @@ -0,0 +1,381 @@ +import hashlib +import json +import logging +import os +import re +from dataclasses import asdict +from pathlib import Path +from traceback import format_exc +from typing import IO, Any, Dict, Optional, Union +from urllib.parse import urlparse + +from git import Repo +from grpc import RpcError +from immudb import ImmudbClient +from immudb.datatypes import SafeGetResponse +from immudb.rootService import RootService + +Dict = Dict[str, Any] + + +class ImmudbWrapper(ImmudbClient): + def __init__( + self, + username: str = 'immudb', + password: str = 'immudb', + database: str = 'defaultdb', + immudb_address: Optional[str] = 'localhost:3322', + root_service: Optional[RootService] = None, + public_key_file: Optional[str] = None, + timeout: Optional[int] = None, + max_grpc_message_length: Optional[int] = None, + logger: Optional[logging.Logger] = None, + ): + """ + The wrapper around binary `immuclient` from Codenotary. + + Args: + username (str): Immudb username to log in (default: "immudb"). + password (str): Immudb password to log in (default: "immudb"). + database (str): Immudb database to be used (default: "defaultdb"). + immudb_address (str, optional): url in format ``host:port`` + (e.g. ``localhost:3322``) of your immudb instance. + Defaults to ``localhost:3322`` when no value is set. + root_service (RootService, optional): object that implements + RootService, allowing requests to be verified. Optional. + By default in-memory RootService instance will be created + public_key_file (str, optional): path of the public key to use + for authenticating requests. Optional. + timeout (int, optional): global timeout for GRPC requests. Requests + will hang until the server responds if no timeout is set. + max_grpc_message_length (int, optional): maximum size of message + the server should send. The default (4Mb) is used if no + value is set. + logger (logging.Logger, optional): Logger to be used + """ + self.username = username + self.password = password + self.database = database + if not logger: + self._logger = logging.getLogger() + super().__init__( + immudUrl=immudb_address, + rs=root_service, + publicKeyFile=public_key_file, + timeout=timeout, + max_grpc_message_length=max_grpc_message_length, + ) + self.login( + username=self.username, + password=self.password, + ) + self.useDatabase(self.encode(self.database)) + + @classmethod + def get_version(cls) -> str: + return "0.1.0" + + def encode( + self, + value: Union[str, bytes, dict], + ) -> bytes: + if isinstance(value, str): + result = value.encode() + elif isinstance(value, bytes): + result = value + elif isinstance(value, dict): + result = json.dumps(value).encode() + else: + raise ValueError( + "Cannot encode value that isn't str, bytes or dict." + ) + return result + + def to_dict( + self, + response: SafeGetResponse, + ) -> Dict: + result = asdict(response) + result['key'] = result['key'].decode() + result['value'] = json.loads(result['value'].decode()) + return result + + def get_size_format( + self, + value: int, + factor: int = 1024, + suffix: str = "B", + ) -> str: + """ + Scale bytes to its proper byte format + e.g: + 1253656 => '1.20 MB' + 1253656678 => '1.17 GB' + """ + for unit in [ + "", + "K", + "M", + "G", + "T", + "P", + "E", + "Z", + ]: + if value < factor: + return f"{value:.2f} {unit}{suffix}" + value /= factor + return f"{value:.2f} Y{suffix}" + + def get_directory_size(self, path: Union[str, os.PathLike]) -> int: + return sum(file.stat().st_size for file in Path(path).rglob('*')) + + def get_file_size(self, file_path: Union[str, os.PathLike]) -> int: + return Path(file_path).stat().st_size + + def get_hasher(self, checksum_type: str = 'sha256'): + """ + Returns a corresponding hashlib hashing function for the specified + checksum type. + + Parameters + ---------- + checksum_type : str + Checksum type (e.g. sha1, sha256). + + Returns + ------- + hashlib._Hash + Hashlib hashing function. + """ + return hashlib.new(checksum_type) + + def hash_file( + self, + file_path: Union[str, IO], + hash_type: str = 'sha256', + buff_size: int = 1048576, + hasher=None, + ) -> str: + """ + Returns checksum (hexadecimal digest) of the file. + + Parameters + ---------- + file_path : str or file-like + File to hash. It could be either a path or a file descriptor. + hash_type : str + Hash type (e.g. sha1, sha256). + buff_size : int + Number of bytes to read at once. + hasher : hashlib._Hash + Any hash algorithm from hashlib. + + Returns + ------- + str + Checksum (hexadecimal digest) of the file. + """ + if hasher is None: + hasher = self.get_hasher(hash_type) + + def feed_hasher(_fd): + buff = _fd.read(buff_size) + while len(buff): + if not isinstance(buff, bytes): + buff = buff.encode() + hasher.update(buff) + buff = _fd.read(buff_size) + + if isinstance(file_path, str): + with open(file_path, "rb") as fd: + feed_hasher(fd) + else: + file_path.seek(0) + feed_hasher(file_path) + return hasher.hexdigest() + + def hash_content( + self, + content: Union[str, bytes], + ) -> str: + hasher = self.get_hasher() + if isinstance(content, str): + content = content.encode() + hasher.update(content) + return hasher.hexdigest() + + @staticmethod + def extract_git_metadata( + repo_path: Union[str, os.PathLike], + ) -> Dict: + with Repo(repo_path) as repo: + url = urlparse(repo.remote().url) + commit = repo.commit() + name = ( + f'git@{url.netloc}' + f'{re.sub(r"^/", ":", url.path)}' + f'@{commit.hexsha[:7]}' + ) + return { + 'Name': name, + 'git': { + 'Author': { + 'Email': commit.author.email, + 'Name': commit.author.name, + 'When': commit.authored_datetime.strftime( + '%Y-%m-%dT%H:%M:%S%z', + ), + }, + 'Commit': commit.hexsha, + 'Committer': { + 'Email': commit.committer.email, + 'Name': commit.committer.name, + 'When': commit.committed_datetime.strftime( + '%Y-%m-%dT%H:%M:%S%z', + ), + }, + 'Message': commit.message, + 'PGPSignature': commit.gpgsig, + 'Parents': [ + parent.hexsha for parent in commit.iter_parents() + ], + 'Tree': commit.tree.hexsha, + }, + } + + @property + def default_metadata(self) -> Dict: + return { + 'sbom_api_ver': '0.2', + } + + def verified_get( + self, + key: Union[str, bytes], + revision: Optional[int] = None, + ) -> Dict: + try: + return self.to_dict( + self.verifiedGet( + key=self.encode(key), + atRevision=revision, + ), + ) + except RpcError: + return {'error': format_exc()} + + def verified_set( + self, + key: Union[str, bytes], + value: Union[str, bytes, Dict], + ) -> Dict: + try: + return asdict( + self.verifiedSet( + key=self.encode(key), + value=self.encode(value), + ), + ) + except RpcError: + return {'error': format_exc()} + + def notarize( + self, + key: str, + value: Union[str, bytes, Dict], + ) -> Dict: + result = self.verified_set(key, value) + if 'error' in result: + return result + return self.verified_get(key) + + def notarize_file( + self, + file: str, + user_metadata: Optional[Dict] = None, + ) -> Dict: + """ + This method calculates the file hash and file size and inserts them + with the user's metadata (if provided), into the database. + """ + if not user_metadata: + user_metadata = {} + hash_file = self.hash_file(file) + payload = { + 'Name': Path(file).name, + 'Kind': 'file', + 'Size': self.get_size_format(self.get_file_size(file)), + 'Hash': hash_file, + 'Metadata': { + **self.default_metadata, + **user_metadata, + }, + } + return self.notarize( + key=hash_file, + value=payload, + ) + + def notarize_git_repo( + self, + repo_path: Union[str, os.PathLike], + user_metadata: Optional[Dict] = None, + ) -> Dict: + """ + This method extracts the git metadata from a provided git directory, + calculates the hash of the extracted metadata and inserts that + metadata with the user's metadata (if provided), into the database. + Falls with a `InvalidGitRepositoryError` when + accepting non-git directories. + """ + if not user_metadata: + user_metadata = {} + git_metadata = self.extract_git_metadata(repo_path) + metadata_hash = self.hash_content(json.dumps(git_metadata['git'])) + payload = { + 'Name': git_metadata['Name'], + 'Kind': 'git', + 'Size': self.get_size_format(self.get_directory_size(repo_path)), + 'Hash': metadata_hash, + 'Metadata': { + 'git': git_metadata['git'], + **self.default_metadata, + **user_metadata, + }, + } + return self.notarize( + key=metadata_hash, + value=payload, + ) + + def authenticate( + self, + key: Union[str, bytes], + ) -> Dict: + return self.verified_get(key) + + def authenticate_file(self, file: str) -> Dict: + """ + This method calculates the file hash of the provided file + and looks up the metadata of that hash in the database. + Returns a dict with an error if metadata doesn't exist in the database. + """ + return self.authenticate(self.hash_file(file)) + + def authenticate_git_repo( + self, + repo_path: Union[str, os.PathLike], + ) -> Dict: + """ + This method extracts the git metadata from a provided git directory, + calculates the hash of the extracted metadata, and looks up + the metadata of that hash in the database. + Returns a dict with an error if metadata doesn't exist in the database. + """ + metadata_hash = self.hash_content( + json.dumps( + self.extract_git_metadata(repo_path)['git'], + ), + ) + return self.authenticate(metadata_hash) diff --git a/setup.py b/setup.py new file mode 100644 index 0000000..26029d9 --- /dev/null +++ b/setup.py @@ -0,0 +1,26 @@ +from setuptools import setup + +setup( + name='immudb_wrapper', + version='0.1.0', + author='Daniil Anfimov', + author_email='anfimovdan@gmail.com', + description='The wrapper around binary `immudbclient` from Codenotary.', + url='https://git.almalinux.org/almalinux/immudb_wrapper', + project_urls={ + 'Bug Tracker': 'https://git.almalinux.org/almalinux/immudb_wrapper/issues', + }, + classifiers=[ + 'Programming Language :: Python :: 3', + 'License :: OSI Approved :: ' + 'GNU General Public License v3 or later (GPLv3+)', + 'Operating System :: OS Independent', + ], + py_modules=['immudb_wrapper'], + scripts=['immudb_wrapper.py'], + install_requires=[ + 'GitPython>=3.1.20', + 'immudb-py>=1.4.0' + ], + python_requires='>=3.6', +)