Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
98 changes: 98 additions & 0 deletions .circleci/config.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
version: 2

job_options:
- &default_job
docker:
- image: cimg/python:3.11.13

step_options:
- &restore_pip_cache_options
keys:
- v3-pip-{{ arch }}-{{ checksum "pyproject.toml" }}-{{ checksum "requirements.txt" }}
- v3-pip-{{ arch }}-{{ checksum "pyproject.toml" }}
- v3-pip-{{ arch }}
- v3-pip-
- &save_pip_cache_options
key: v3-pip-{{ arch }}-{{ checksum "pyproject.toml" }}-{{ checksum "requirements.txt" }}
paths:
- ~/.local
- ~/.cache
- &restore_venv_cache_options
keys:
- v3-venv-{{ arch }}-{{ checksum "pyproject.toml" }}-{{ checksum "requirements.txt" }}
- v3-venv-{{ arch }}-{{ checksum "pyproject.toml" }}
- v3-venv-{{ arch }}
- v3-venv-
- &save_venv_cache_options
key: v3-venv-{{ arch }}-{{ checksum "pyproject.toml" }}-{{ checksum "requirements.txt" }}
paths:
- /home/circleci/project/venv

steps:
- &restore_pip_cache
restore_cache:
<< : *restore_pip_cache_options
- &save_pip_cache
save_cache:
<< : *save_pip_cache_options
- &restore_venv_cache
restore_cache:
<< : *restore_venv_cache_options
- &save_venv_cache
save_cache:
<< : *save_venv_cache_options

jobs:
prepare_cache:
<< : *default_job
steps:
- checkout
- *restore_pip_cache
- *restore_venv_cache
- run:
name: Install dependencies
command: make ci-dev-install
- *save_pip_cache
- *save_venv_cache

lint:
<< : *default_job
steps:
- checkout
- *restore_pip_cache
- *restore_venv_cache
- run:
name: Run the linter
command: make lint
- store_test_results:
path: build/test

test:
<< : *default_job
steps:
- checkout
- *restore_pip_cache
- *restore_venv_cache
- run:
name: Run the tests
command: make test-only
- store_test_results:
path: build/test
- store_artifacts:
path: build/coverage/coverage.xml
destination: coverage
- run:
name: Upload coverage to Codecov
command: bash <(curl -s https://codecov.io/bash) -t ${CODECOV_TOKEN} -f build/coverage/coverage.xml || echo "Codecov did not collect coverage reports"

workflows:
version: 2
test:
jobs:
- prepare_cache
- lint:
requires:
- prepare_cache
- test:
requires:
- prepare_cache
2 changes: 1 addition & 1 deletion .python-version
Original file line number Diff line number Diff line change
@@ -1 +1 @@
3.6.8
3.11.13
4 changes: 2 additions & 2 deletions .travis.yml
Original file line number Diff line number Diff line change
@@ -1,9 +1,9 @@
language: python
python:
- "3.6"
- "3.11"
# command to install dependencies
install:
- make dev
# command to run tests
script:
- make tests
- make test
74 changes: 54 additions & 20 deletions Makefile
Original file line number Diff line number Diff line change
@@ -1,44 +1,75 @@
# Some simple testing tasks (sorry, UNIX only).

PYTHON=venv/bin/python3
PYTHON=venv/bin/python
PIP=venv/bin/pip
NOSE=venv/bin/nosetests
NOSE=venv/bin/pynose
FLAKE=venv/bin/flake8
PYPICLOUD_HOST=pypicloud.getkeepsafe.local
PIP_ARGS=--extra-index=http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST)
PIP_ARGS=--extra-index-url http://$(PYPICLOUD_HOST)/simple/ --trusted-host $(PYPICLOUD_HOST)
TWINE=./venv/bin/twine
PYNOSE_SHARED_FLAGS=-s --with-coverage --cover-inclusive --cover-erase --cover-package=validator tests
PYNOSE_FLAGS=$(PYNOSE_SHARED_FLAGS)
ifdef CI
PYNOSE_FLAGS += --cover-xml --cover-xml-file=build/coverage/coverage.xml --with-xunit --xunit-file=build/test/results.xml
endif
FLAGS=

update:
$(PIP) install -U pip
$(PIP) install $(PIP_ARGS) -U .
build-dir:
mkdir -p build/test build/coverage

env:
test -d venv || python3 -m venv venv
test -d venv || python3.11 -m venv venv
$(PIP) install -U pip setuptools wheel
$(PIP) install $(PIP_ARGS) -e .

dev: env update
$(PIP) install $(PIP_ARGS) .[tests,devtools]
dev: env
$(PIP) install $(PIP_ARGS) -e '.[dev]'

install: env update
install: env

publish:
publish: dev
rm -rf dist
$(PYTHON) -m build .
$(TWINE) upload --verbose --sign --username developer --repository-url http://$(PYPICLOUD_HOST)/simple/ dist/*.whl

flake:
$(FLAKE) validator tests

test: flake
$(NOSE) -s $(FLAGS)
check-msgpack:
@true

lint: build-dir flake check-msgpack

test-only: build-dir
$(NOSE) $(PYNOSE_FLAGS) $(FLAGS)

vtest:
$(NOSE) -s -v $(FLAGS)
test: lint test-only

cov cover coverage:
$(NOSE) -s --with-cover --cover-html --cover-html-dir ./coverage $(FLAGS)
vtest vtests: build-dir
$(NOSE) -v $(PYNOSE_FLAGS) $(FLAGS)

cov cover coverage: build-dir
$(NOSE) $(PYNOSE_FLAGS) --cover-html --cover-html-dir ./coverage $(FLAGS)
echo "open file://`pwd`/coverage/index.html"

ci-env:
@if [ -d "venv" ] && $(PIP) --version >/dev/null 2>&1; then \
echo "Reusing cached CI venv, no need to recreate when it hasn't changed"; \
else \
echo "No cached venv found, creating fresh venv..."; \
if [ -d "venv" ]; then rm -rf venv; fi; \
python3.11 -m venv venv; \
$(PIP) install -U pip setuptools wheel; \
fi

ci-dev-install: ci-env
$(PIP) install $(PIP_ARGS) -e '.[dev]'

hooks:
cp git_hooks/pre-push `git rev-parse --git-path hooks/pre-push`
chmod +x `git rev-parse --git-path hooks/pre-push`

unhooks:
rm -f `git rev-parse --git-path hooks/pre-push`

clean:
rm -rf `find . -name __pycache__`
rm -f `find . -type f -name '*.py[co]' `
Expand All @@ -51,7 +82,10 @@ clean:
rm -f .coverage
rm -rf coverage
rm -rf build
rm -rf dist
rm -rf *.egg-info
rm -rf venv


.PHONY: all build env linux run pep test vtest testloop cov clean
.PHONY: build-dir env dev install publish flake check-msgpack lint test-only test vtest vtests cov cover coverage ci-env \
ci-dev-install hooks unhooks clean
79 changes: 49 additions & 30 deletions README.md
Original file line number Diff line number Diff line change
@@ -1,11 +1,11 @@
content-validator [![Build Status](https://travis-ci.org/KeepSafe/content-validator.svg?branch=master)](https://travis-ci.org/KeepSafe/content-validator)
content-validator [![Build Status](https://travis-ci.org/KeepSafe/content-validator.svg?branch=master)](https://travis-ci.org/KeepSafe/content-validator) [![CircleCI](https://circleci.com/gh/KeepSafe/content-validator.svg?style=svg)](https://circleci.com/gh/KeepSafe/content-validator)
=================

Content validator looks at text content and preforms different validation tasks.

## Requirements

1. Python 3.6.6+
1. Python 3.11

## Installation

Expand All @@ -16,16 +16,23 @@ Content validator looks at text content and preforms different validation tasks.
`make env`
`make dev`

Common local commands:

* `make env` - create `venv` and install the package runtime dependencies.
* `make dev` - install runtime and development/test dependencies.
* `make lint` - run flake8 using `pyproject.toml` configuration.
* `make test` - run lint and the test suite.
* `make coverage` - run tests with coverage and write the HTML report to `coverage/`.
* `make clean` - remove local build, coverage, cache, and virtualenv artifacts.

## Usage

Generally it's easiest to write a separate test for each validation case. The simplest example:

```
f = files('src/**/*.txt')
parser = create_parser(Filetype.txt)
reporter = ConsoleReporter()
check = urls(Filetype.txt)
result = validator.validate(checks=[check], files=f, parser=parser, reporter=reporter)
import validator

result = validator.parse().files('src/**/*.txt').check().url().validate()
self.assertEqual([], result)
```

Expand All @@ -41,34 +48,41 @@ In case you are not doing any comparison checks you can use a usual glob like pa

When the file is first read it the data you want to validate needs to be extracted from it. The simplest example is a text file. Nothing is done here except reading the file content. The more complex example is when, for eg., you have embedded markdown in an xml tag. To extract the data you should create a chain of parsers. First you want to extract all tags from the xml. Second you want to parse the content of the tags from markdown to html. Here is an example how to do that:

`chain_parsers([Filetype.xml, Filetype.md], query='//strings')`
`validator.parse().files('src/{lang}/*.xml', lang='en').xml(query='.//string').md().check().md().validate()`

The xml parser takes additional parameter `query` used to extract the tags. You can pass it to `create_parser` in the same way:
The xml parser takes additional parameter `query` used to extract the tags:

`create_parser(Filetype.xml, query='//strings')`
`validator.parse().files('src/{lang}/*.xml', lang='en').xml(query='.//string')`

Available parsers types:

* `Filetype.txt` - simply reads the file
* `Filetype.md` - converts markdown to
* `Filetype.xml` - extracts text from xml and concatenates
* `Filetype.csv` - puts every value on a separate line
* `files(...).check()` - simply reads the file
* `.md()` - converts markdown to HTML
* `.xml(query='*')` - extracts text from XML and concatenates matching elements
* `.csv()` - puts every value on a separate line

### Reporters

Shows the result of the validation. There are 2 reporters available:

* `HtmlReporter` - creates an error file for every error
* `ConsoleReporter` - print the error to the console
* `ConsoleReporter` - prints the error to the console

### Checks

Checks perform validation on the content. Wether it's url or structure or anything else. If the content in not valid the check will return an error which later can be passed to a reporter.
Checks perform validation on the content. Whether it's url or structure or anything else. If the content is not valid the
check will return an error which later can be passed to a reporter.

Available checks:

* `urls(filetype, skip_images=False)` - validates if the url is accessible
* `markdown()` - validates markdown structure by comparing it with the base
* `.url(skip_images=False)` - validates if the url is accessible
* `.md()` - validates markdown structure by comparing it with the base
* `.java()` - validates Java placeholder/reference compatibility

## CLI

The package exposes a `content-validator` command. The current CLI is intentionally minimal; use `content-validator --help`
or `content-validator --version` for smoke checks, and use the Python API for validations.

## Example

Expand All @@ -78,18 +92,23 @@ A more detailed example looks like this:
class TestEmail(TestCase):

def test_email(self):
f = files('src/{lang}/*.xml', lang='en')
parser = create_parser(Filetype.xml, query='.//string')
reporter = HtmlReporter()
md = markdown()
result = validator.validate(checks=[md], files=f, parser=parser, reporter=reporter)
self.assertEqual({}, v.validate())
result = validator \
.parse() \
.files('src/{lang}/*.xml', lang='en') \
.xml(query='.//string') \
.check() \
.md() \
.validate()
self.assertEqual([], result)

def test_urls(self):
f = files('src/{lang}/*.xml', lang='en')
parser = chain_parsers([Filetype.xml, Filetype.md], query='.//string')
reporter = ConsoleReporter()
check = urls(Filetype.html, skip_images=True)
result = Validator(checks=[check], files=f, parser=parser, reporter=reporter)
self.assertEqual({}, v.validate())
result = validator \
.parse() \
.files('src/{lang}/*.xml', lang='en') \
.xml(query='.//string') \
.md() \
.check() \
.url(skip_images=True) \
.validate()
self.assertEqual([], result)
```
Loading