Recipes
When using with Faker
When using with Faker
, there are two ways of using the providers.
Imports and initializations
Recommended way
from faker import Faker
from faker_file.providers.txt_file import TxtFileProvider
FAKER = Faker()
FAKER.add_provider(TxtFileProvider)
# Usage example
txt_file = FAKER.txt_file(content="Lorem ipsum")
See the full example
here
But this works too
from faker import Faker
from faker_file.providers.txt_file import TxtFileProvider
FAKER = Faker()
# Usage example
txt_file = TxtFileProvider(FAKER).txt_file(content="Lorem ipsum")
See the full example
here
Throughout documentation we will be mixing these approaches.
Create a TXT file with static content
Content of the file is
Lorem ipsum
.
txt_file = FAKER.txt_file(content="Lorem ipsum")
See the full example
here
Create a DOCX file with dynamically generated content
Content is generated dynamically.
Content is limited to 1024 chars.
Wrap lines after 80 chars.
Prefix the filename with
zzz
.
docx_file = FAKER.docx_file(
prefix="zzz",
max_nb_chars=1_024,
wrap_chars_after=80,
)
See the full example
here
Create a ZIP file consisting of TXT files with static content
5 TXT files in the ZIP archive (default value is 5).
Content of all files is
Lorem ipsum
.
zip_file = FAKER.zip_file(
options={"create_inner_file_args": {"content": "Lorem ipsum"}}
)
See the full example
here
Create a ZIP file consisting of 3 DOCX files with dynamically generated content
3 DOCX files in the ZIP archive.
Content is generated dynamically.
Content is limited to 1024 chars.
Prefix the filenames in archive with
xxx_
.Prefix the filename of the archive itself with
zzz
.Inside the ZIP, put all files in directory
yyy
.
from faker_file.providers.helpers.inner import create_inner_docx_file
zip_file = FAKER.zip_file(
prefix="zzz",
options={
"count": 3,
"create_inner_file_func": create_inner_docx_file,
"create_inner_file_args": {
"prefix": "xxx_",
"max_nb_chars": 1_024,
},
"directory": "yyy",
},
)
See the full example
here
Create a ZIP file of 9 DOCX files with content generated from template
9 DOCX files in the ZIP archive.
Content is generated dynamically from given template.
from faker_file.providers.helpers.inner import create_inner_docx_file
TEMPLATE = "Hey {{name}},\n{{text}},\nBest regards\n{{name}}"
zip_file = FAKER.zip_file(
options={
"count": 9,
"create_inner_file_func": create_inner_docx_file,
"create_inner_file_args": {
"content": TEMPLATE,
},
}
)
See the full example
here
Create a nested ZIP file
Create a ZIP file which contains 5 ZIP files which contain 5 ZIP files which contain 5 DOCX files.
5 ZIP files in the ZIP archive.
Content is generated dynamically.
Prefix the filenames in archive with
nested_level_1_
.Prefix the filename of the archive itself with
nested_level_0_
.Each of the ZIP files inside the ZIP file in their turn contains 5 other ZIP files, prefixed with
nested_level_2_
, which in their turn contain 5 DOCX files.
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_zip_file,
)
zip_file = FAKER.zip_file(
prefix="nested_level_0_",
options={
"create_inner_file_func": create_inner_zip_file,
"create_inner_file_args": {
"prefix": "nested_level_1_",
"options": {
"create_inner_file_func": create_inner_zip_file,
"create_inner_file_args": {
"prefix": "nested_level_2_",
"options": {
"create_inner_file_func": create_inner_docx_file,
},
},
},
},
},
)
See the full example
here
Create a ZIP file with variety of different file types within
50 files in the ZIP archive (limited to DOCX, EPUB and TXT types).
Content is generated dynamically.
Prefix the filename of the archive itself with
zzz_archive_
.Inside the ZIP, put all files in directory
zzz
.
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_epub_file,
create_inner_txt_file,
fuzzy_choice_create_inner_file,
)
from faker_file.storages.filesystem import FileSystemStorage
STORAGE = FileSystemStorage()
kwargs = {"storage": STORAGE, "generator": FAKER}
zip_file = FAKER.zip_file(
prefix="zzz_archive_",
options={
"count": 50,
"create_inner_file_func": fuzzy_choice_create_inner_file,
"create_inner_file_args": {
"func_choices": [
(create_inner_docx_file, kwargs),
(create_inner_epub_file, kwargs),
(create_inner_txt_file, kwargs),
],
},
"directory": "zzz",
},
)
See the full example
here
Another way to create a ZIP file with variety of different file types within
3 files in the ZIP archive (1 DOCX, and 2 XML types).
Content is generated dynamically.
Filename of the archive itself is
alice-looking-through-the-glass.zip
.Files inside the archive have fixed name (passed with
basename
argument).
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_xml_file,
list_create_inner_file,
)
zip_file = FAKER.zip_file(
basename="alice-looking-through-the-glass",
options={
"create_inner_file_func": list_create_inner_file,
"create_inner_file_args": {
"func_list": [
(create_inner_docx_file, {"basename": "doc"}),
(create_inner_xml_file, {"basename": "doc_metadata"}),
(create_inner_xml_file, {"basename": "doc_isbn"}),
],
},
},
)
See the full example
here
Note, that count
argument (not shown in the example, but commonly
accepted by inner functions) will be simply ignored here.
Create an EML file consisting of TXT files with static content
5 TXT files in the EML email (default value is 5).
Content of all files is
Lorem ipsum
.
from faker_file.providers.eml_file import EmlFileProvider
FAKER.add_provider(EmlFileProvider)
eml_file = FAKER.eml_file(
options={"create_inner_file_args": {"content": "Lorem ipsum"}}
)
See the full example
here
Create a EML file consisting of 3 DOCX files with dynamically generated content
3 DOCX files in the EML email.
Content is generated dynamically.
Content is limited to 1024 chars.
Prefix the filenames in email with
xxx_
.Prefix the filename of the email itself with
zzz
.
from faker_file.providers.helpers.inner import create_inner_docx_file
eml_file = FAKER.eml_file(
prefix="zzz",
options={
"count": 3,
"create_inner_file_func": create_inner_docx_file,
"create_inner_file_args": {
"prefix": "xxx_",
"max_nb_chars": 1_024,
},
},
)
See the full example
here
Create a nested EML file
Create a EML file which contains 5 EML files which contain 5 EML files which contain 5 DOCX files.
5 EML files in the EML file.
Content is generated dynamically.
Prefix the filenames in EML email with
nested_level_1_
.Prefix the filename of the EML email itself with
nested_level_0_
.Each of the EML files inside the EML file in their turn contains 5 other EML files, prefixed with
nested_level_2_
, which in their turn contain 5 DOCX files.
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_eml_file,
)
eml_file = FAKER.eml_file(
prefix="nested_level_0_",
options={
"create_inner_file_func": create_inner_eml_file,
"create_inner_file_args": {
"prefix": "nested_level_1_",
"options": {
"create_inner_file_func": create_inner_eml_file,
"create_inner_file_args": {
"prefix": "nested_level_2_",
"options": {
"create_inner_file_func": create_inner_docx_file,
},
},
},
},
},
)
See the full example
here
Create an EML file with variety of different file types within
10 files in the EML file (limited to DOCX, EPUB and TXT types).
Content is generated dynamically.
Prefix the filename of the EML itself with
zzz
.
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_epub_file,
create_inner_txt_file,
fuzzy_choice_create_inner_file,
)
eml_file = FAKER.eml_file(
prefix="zzz",
options={
"count": 10,
"create_inner_file_func": fuzzy_choice_create_inner_file,
"create_inner_file_args": {
"func_choices": [
(create_inner_docx_file, kwargs),
(create_inner_epub_file, kwargs),
(create_inner_txt_file, kwargs),
],
},
},
)
See the full example
here
Create a PDF file with predefined template containing dynamic fixtures
Content template is predefined and contains dynamic fixtures.
Wrap lines after 80 chars.
from faker_file.providers.pdf_file import PdfFileProvider
FAKER.add_provider(PdfFileProvider)
TEMPLATE = """
{{date}} {{city}}, {{country}}
Hello {{name}},
{{text}} {{text}} {{text}}
{{text}} {{text}} {{text}}
{{text}} {{text}} {{text}}
Address: {{address}}
Best regards,
{{name}}
{{address}}
{{phone_number}}
"""
pdf_file = FAKER.pdf_file(content=TEMPLATE, wrap_chars_after=80)
See the full example
here
Create a DOCX file with table and image using DynamicTemplate
When pre-defined templating and dynamic fixtures are not enough and
full control is needed, you can use DynamicTemplate
wrapper.
It takes a list of content modifiers
(tuples): (func: Callable, kwargs: dict)
. Each callable should accept
the following arguments:
provider: Faker
Generator
instance orFaker
instance.document: Document instance. Implementation specific.
data: Dictionary. Used primarily for observability.
counter: Integer. Index number of the content modifier.
**kwargs: Dictionary. Useful to pass implementation-specific arguments.
The following example shows how to generate a DOCX file with paragraph, table and image.
from faker_file.base import DynamicTemplate
from faker_file.contrib.docx_file import (
add_page_break,
add_paragraph,
add_picture,
add_table,
)
# Create a DOCX file with paragraph, picture, table and manual page breaks
# in between the mentioned elements. The ``DynamicTemplate`` simply
# accepts a list of callables (such as ``add_paragraph``,
# ``add_page_break``) and dictionary to be later on fed to the callables
# as keyword arguments for customising the default values.
docx_file = FAKER.docx_file(
content=DynamicTemplate(
[
(add_paragraph, {}), # Add paragraph
(add_page_break, {}), # Add page break
(add_picture, {}), # Add picture
(add_page_break, {}), # Add page break
(add_table, {}), # Add table
(add_page_break, {}), # Add page break
]
)
)
See the full example
here
Create a ODT file with table and image using DynamicTemplate
Similarly to previous section, the following example shows how to generate an ODT file with table and image.
from faker_file.contrib.odt_file import (
add_page_break,
add_paragraph,
add_picture,
add_table,
)
from faker_file.providers.odt_file import OdtFileProvider
FAKER.add_provider(OdtFileProvider) # Register OdtFileProvider
# Create a ODT file with paragraph, picture, table and manual page breaks
# in between the mentioned elements. The ``DynamicTemplate`` simply
# accepts a list of callables (such as ``add_paragraph``,
# ``add_page_break``) and dictionary to be later on fed to the callables
# as keyword arguments for customising the default values.
odt_file = FAKER.odt_file(
content=DynamicTemplate(
[
(add_paragraph, {}), # Add paragraph
(add_page_break, {}), # Add page break
(add_picture, {}), # Add picture
(add_page_break, {}), # Add page break
(add_table, {}), # Add table
(add_page_break, {}), # Add page break
]
)
)
See the full example
here
Create a PDF using reportlab generator
from faker_file.providers.pdf_file.generators.reportlab_generator import (
ReportlabPdfGenerator,
)
pdf_file = FAKER.pdf_file(pdf_generator_cls=ReportlabPdfGenerator)
See the full example
here
Create a PDF using pdfkit generator
Note, that at the moment, pdfkit
is the default generator. However,
you could set it explicitly as follows:
from faker_file.providers.pdf_file.generators.pdfkit_generator import (
PdfkitPdfGenerator,
)
pdf_file = FAKER.pdf_file(pdf_generator_cls=PdfkitPdfGenerator)
See the full example
here
Create a graphic PDF file using Pillow
Graphic PDF file does not contain text. Don’t use it when you need text based content. However, sometimes you just need a valid file in PDF format, without caring much about the content. That’s where a GraphicPdfFileProvider comes to rescue:
from faker_file.providers.pdf_file import GraphicPdfFileProvider
FAKER.add_provider(GraphicPdfFileProvider)
pdf_file = FAKER.graphic_pdf_file()
See the full example
here
The generated file will contain a random graphic (consisting of lines and
shapes of different colours). One of the most useful arguments supported is
size
.
pdf_file = FAKER.graphic_pdf_file(size=(800, 800))
See the full example
here
Graphic providers
Graphic file providers does not contain text. Don’t use it when you need text based content. However, sometimes you just need a valid image file with graphics of a certain size. That’s where graphic file providers help.
Supported files formats are: BMP, GIF, ICO, JPEG, PDF, PNG, SVG TIFF and WEBP.
Create an ICO file
from faker_file.providers.ico_file import GraphicIcoFileProvider
FAKER.add_provider(GraphicIcoFileProvider)
ico_file = FAKER.graphic_ico_file(size=(800, 800))
See the full example
here
Create a JPEG file
from faker_file.providers.jpeg_file import GraphicJpegFileProvider
FAKER.add_provider(GraphicJpegFileProvider)
jpeg_file = FAKER.graphic_jpeg_file(size=(800, 800))
See the full example
here
Create a PNG file
from faker_file.providers.png_file import GraphicPngFileProvider
FAKER.add_provider(GraphicPngFileProvider)
png_file = FAKER.graphic_png_file(size=(800, 800))
See the full example
here
Create a WEBP file
from faker_file.providers.webp_file import GraphicWebpFileProvider
FAKER.add_provider(GraphicWebpFileProvider)
webp_file = FAKER.graphic_webp_file(size=(800, 800))
See the full example
here
Create a MP3 file
from faker_file.providers.mp3_file import Mp3FileProvider
FAKER.add_provider(Mp3FileProvider)
mp3_file = FAKER.mp3_file()
See the full example
here
Create a MP3 file by explicitly specifying MP3 generator class
Google Text-to-Speech
from faker_file.providers.mp3_file.generators.gtts_generator import (
GttsMp3Generator,
)
mp3_file = FAKER.mp3_file(mp3_generator_cls=GttsMp3Generator)
See the full example
here
You can tune arguments too:
mp3_file = FAKER.mp3_file(
mp3_generator_cls=GttsMp3Generator,
mp3_generator_kwargs={
"lang": "en",
"tld": "co.uk",
},
)
See the full example
here
Refer to https://gtts.readthedocs.io/en/latest/module.html#languages-gtts-lang
for list of accepted values for lang
argument.
Refer to https://gtts.readthedocs.io/en/latest/module.html#localized-accents
for list of accepted values for tld
argument.
Microsoft Edge Text-to-Speech
from faker_file.providers.mp3_file.generators.edge_tts_generator import (
EdgeTtsMp3Generator,
)
mp3_file = FAKER.mp3_file(mp3_generator_cls=EdgeTtsMp3Generator)
See the full example
here
You can tune arguments too:
mp3_file = FAKER.mp3_file(
mp3_generator_cls=EdgeTtsMp3Generator,
mp3_generator_kwargs={
"voice": "en-GB-LibbyNeural",
},
)
See the full example
here
Run edge-tts -l
from terminal for list of available voices.
Create a MP3 file with custom MP3 generator
Default MP3 generator class is GttsMp3Generator
which uses Google
Text-to-Speech services to generate an MP3 file from given or
randomly generated text. It does not require additional services to
run and the only dependency here is the gtts
package. You can
however implement your own custom MP3 generator class and pass it to
te mp3_file
method in mp3_generator_cls
argument instead of the
default GttsMp3Generator
. Read about quotas of Google Text-to-Speech
services here.
Usage with custom MP3 generator class.
from faker_file.providers.base.mp3_generator import BaseMp3Generator
from marytts import MaryTTS # Imaginary `marytts` Python library
# Define custom MP3 generator
class MaryTtsMp3Generator(BaseMp3Generator):
locale: str = "cmu-rms-hsmm"
voice: str = "en_US"
def handle_kwargs(self, **kwargs) -> None:
# Since it's impossible to unify all TTS systems it's allowed
# to pass arbitrary arguments to the `BaseMp3Generator`
# constructor. Each implementation class contains its own
# additional tuning arguments. Check the source code of the
# implemented MP3 generators as an example.
if "locale" in kwargs:
self.locale = kwargs["locale"]
if "voice" in kwargs:
self.voice = kwargs["voice"]
def generate(self) -> bytes:
# Your implementation here. Note, that `self.content`
# in this context is the text to make MP3 from.
# `self.generator` would be the `Faker` or `Generator`
# instance from which you could extract information on
# active locale.
# What comes below is pseudo implementation.
mary_tts = MaryTTS(locale=self.locale, voice=self.voice)
return mary_tts.synth_mp3(self.content)
# Generate MP3 file from random text
mp3_file = FAKER.mp3_file(
mp3_generator_cls=MaryTtsMp3Generator,
)
See the full example
here
See exact implementation of marytts_mp3_generator in the examples.
Pick a random file from a directory given
Create an exact copy of the randomly picked file under a different name.
Prefix of the destination file would be
zzz
.source_dir_path
is the absolute path to the directory to pick files from.
from faker_file.providers.random_file_from_dir import RandomFileFromDirProvider
FAKER.add_provider(RandomFileFromDirProvider)
# We assume that directory "/tmp/tmp/" exists and contains files.
random_file = FAKER.random_file_from_dir(
source_dir_path="/tmp/tmp/",
prefix="zzz",
)
See the full example
here
File from path given
Create an exact copy of a file under a different name.
Prefix of the destination file would be
zzz
.path
is the absolute path to the file to copy.
from faker_file.providers.file_from_path import FileFromPathProvider
FAKER.add_provider(FileFromPathProvider)
# We assume that the file "/tmp/tmp/file.docx" exists.
docx_file = FAKER.file_from_path(
path="/tmp/tmp/file.docx",
prefix="zzz",
)
See the full example
here
Generate a file of a certain size
The only two file types for which it is easy to foresee the file size are BIN and TXT. Note, that size of BIN files is always exact, while for TXT it is approximate.
BIN
from faker_file.providers.bin_file import BinFileProvider
FAKER.add_provider(BinFileProvider)
bin_file = FAKER.bin_file(length=1024**2) # 1 Mb
bin_file = FAKER.bin_file(length=3 * 1024**2) # 3 Mb
bin_file = FAKER.bin_file(length=10 * 1024**2) # 10 Mb
bin_file = FAKER.bin_file(length=1024) # 1 Kb
bin_file = FAKER.bin_file(length=3 * 1024) # 3 Kb
bin_file = FAKER.bin_file(length=10 * 1024) # 10 Kb
See the full example
here
TXT
from faker_file.providers.txt_file import TxtFileProvider
FAKER.add_provider(TxtFileProvider)
txt_file = FAKER.txt_file(max_nb_chars=1024**2) # 1 Mb
txt_file = FAKER.txt_file(max_nb_chars=3 * 1024**2) # 3 Mb
txt_file = FAKER.txt_file(max_nb_chars=10 * 1024**2) # 10 Mb
txt_file = FAKER.txt_file(max_nb_chars=1024) # 1 Kb
txt_file = FAKER.txt_file(max_nb_chars=3 * 1024) # 3 Kb
txt_file = FAKER.txt_file(max_nb_chars=10 * 1024) # 10 Kb
See the full example
here
Generate a files using multiprocessing
Generate 10 DOCX files
Use template.
Generate 10 DOCX files.
from multiprocessing import Pool
from faker_file.providers.helpers.inner import create_inner_docx_file
from faker_file.storages.filesystem import FileSystemStorage
STORAGE = FileSystemStorage()
# Document template
TEMPLATE = "Hey {{name}},\n{{text}},\nBest regards\n{{name}}"
with Pool(processes=2) as pool:
for _ in range(10): # Number of times we want to run our function
pool.apply_async(
create_inner_docx_file,
# Apply async doesn't support kwargs. We have to pass all
# arguments.
[STORAGE, "mp", FAKER, None, None, TEMPLATE],
)
pool.close()
pool.join()
See the full example
here
Randomize the file format
from faker_file.providers.helpers.inner import (
create_inner_docx_file,
create_inner_epub_file,
create_inner_pdf_file,
create_inner_txt_file,
fuzzy_choice_create_inner_file,
)
kwargs = {"storage": STORAGE, "generator": FAKER, "content": TEMPLATE}
with Pool(processes=2) as pool:
for _ in range(10): # Number of times we want to run our function
pool.apply_async(
fuzzy_choice_create_inner_file,
[
[
(create_inner_docx_file, kwargs),
(create_inner_epub_file, kwargs),
(create_inner_pdf_file, kwargs),
(create_inner_txt_file, kwargs),
]
],
)
pool.close()
pool.join()
See the full example
here
Generating files from existing documents using NLP augmentation
See the following example:
from faker_file.providers.augment_file_from_dir import (
AugmentFileFromDirProvider,
)
FAKER.add_provider(AugmentFileFromDirProvider)
# We assume that directory "/tmp/tmp/" exists and contains
# files of `DOCX`, `EML`, `EPUB`, `ODT`, `PDF`, `RTF` or `TXT`
# formats.
augmented_file = FAKER.augment_file_from_dir(
source_dir_path="/tmp/tmp/",
)
See the full example
here
Generated file will resemble text of the original document, but
will not be the same. This is useful when you don’t want to
test on text generated by Faker
, but rather something that
makes more sense for your use case, still want to ensure
uniqueness of the documents.
The following file types are supported:
DOCX
EML
EPUB
ODT
PDF
RTF
TXT
By default, all supported files are eligible for random selection. You could
however narrow that list by providing extensions
argument:
# We assume that directory "/tmp/tmp/" exists and contains
# files of `DOCX` and `ODT` formats.
augmented_file = FAKER.augment_file_from_dir(
source_dir_path="/tmp/tmp/",
extensions={"docx", "odt"}, # Pick only DOCX or ODT
)
See the full example
here
Actual augmentation of texts is delegated to an abstraction layer of text augmenters. Currently, two augmenters are implemented. Default one is based on textaugment (which is in its’ turn based on nltk) is very lightweight and speedy, but produces less accurate results. Another one is based on nlpaug, which is way more sophisticated, but at the cost of speed.
nlpaug augmenter
By default bert-base-multilingual-cased
model is used, which is
pretrained on the top 104 languages with the largest Wikipedia using a
masked language modeling (MLM) objective. If you want to use a different
model, specify the proper identifier in the model_path
argument.
Some well working options for model_path
are:
bert-base-multilingual-cased
bert-base-multilingual-uncased
bert-base-cased
bert-base-uncased
bert-base-german-cased
GroNLP/bert-base-dutch-cased
from faker_file.providers.augment_file_from_dir.augmenters import (
nlpaug_augmenter,
)
# We assume that directory "/tmp/tmp/" exists and contains
# files of `DOCX`, `EML`, `EPUB`, `ODT`, `PDF`, `RTF` or `TXT`
# formats.
augmented_file = FAKER.augment_file_from_dir(
source_dir_path="/tmp/tmp/",
text_augmenter_cls=nlpaug_augmenter.ContextualWordEmbeddingsAugmenter,
text_augmenter_kwargs={
"model_path": "bert-base-cased",
"action": "substitute", # or "insert"
},
)
See the full example
here
Refer to nlpaug
docs
and check Textual augmenters examples.
textaugment augmenter
from faker_file.providers.augment_file_from_dir.augmenters import (
textaugment_augmenter,
)
# We assume that directory "/tmp/tmp/" exists and contains
# files of `DOCX`, `EML`, `EPUB`, `ODT`, `PDF`, `RTF` or `TXT`
# formats. Valid values for `action` are: "random_deletion",
# "random_insertion", "random_swap" and "synonym_replacement" (default).
augmented_file = FAKER.augment_file_from_dir(
source_dir_path="/tmp/tmp/",
text_augmenter_cls=textaugment_augmenter.EDATextaugmentAugmenter,
text_augmenter_kwargs={
"action": "synonym_replacement",
},
)
See the full example
here
Using raw=True features in tests
If you pass raw=True
argument to any provider or inner function,
instead of creating a file, you will get bytes
back (or to be
totally correct, bytes
-like object BytesValue
, which is basically
bytes enriched with meta-data). You could then use the bytes
content
of the file to build a test payload as shown in the example test below:
class UploadTestCase(TestCase):
"""Upload test case."""
def test_create_docx_upload(self) -> None:
"""Test create an Upload."""
url = reverse("api:upload-list")
raw = FAKER.docx_file(raw=True)
test_file = BytesIO(raw)
test_file.name = os.path.basename(raw.data["filename"])
payload = {
"name": FAKER.word(),
"description": FAKER.paragraph(),
"file": test_file,
}
response = self.client.post(url, payload, format="json")
# Test if request is handled properly (HTTP 201)
self.assertEqual(response.status_code, HTTP_201_CREATED)
test_upload = Upload.objects.get(id=response.data["id"])
# Test if the name is properly recorded
self.assertEqual(str(test_upload.name), payload["name"])
# Test if file name recorded properly
self.assertEqual(str(test_upload.file.name), test_file.name)
See the full example
here
Create a HTML file from predefined template
If you want to generate a file in a format that is not (yet) supported,
you can try to use GenericFileProvider
. In the following example,
an HTML file is generated from a template.
from faker_file.providers.generic_file import GenericFileProvider
FAKER.add_provider(GenericFileProvider)
generic_file = FAKER.generic_file(
content="<html><body><p>{{text}}</p></body></html>",
extension="html",
)
See the full example
here
Working with storages
AWS S3 storage
from faker_file.storages.aws_s3 import AWSS3Storage
AWS_S3_STORAGE = AWSS3Storage(
bucket_name="your-bucket-name",
root_path="",
rel_path="",
)
txt_file = FAKER.txt_file(storage=AWS_S3_STORAGE)
See the full example
here
Depending on the ORM or framework you’re using, you might want to tweak the
root_path
and rel_path
values. Especially if you store files in
directories (like your-bucket-name/path/to/the/file.ext
).
For instance, if you use Django
and django-storages
, and want to
store the files inside /user/uploads
directory the following would be
correct:
AWS_S3_STORAGE = AWSS3Storage(
bucket_name="your-bucket-name",
root_path="",
rel_path="user/uploads",
)
See the full example
here
Google Cloud Storage
from faker_file.storages.google_cloud_storage import GoogleCloudStorage
GC_STORAGE = GoogleCloudStorage(
bucket_name="your-bucket-name",
root_path="",
rel_path="",
)
# txt_file = FAKER.txt_file(storage=GC_STORAGE)
See the full example
here
Similarly to AWSS3Storage
, if you use Django
and django-storages
,
and want to store the files inside /user/uploads
directory the following
would be correct:
GC_STORAGE = GoogleCloudStorage(
bucket_name="your-bucket-name",
root_path="",
rel_path="user/uploads",
)
See the full example
here
SFTP storage
from faker_file.storages.sftp_storage import SFTPStorage
SFTP_STORAGE = SFTPStorage(
host="your-sftp-host.domain",
port=22,
username="your-sftp-username",
password="your-sftp-password",
root_path="/dir-name",
)
# txt_file = FAKER.txt_file(storage=SFTP_STORAGE)
See the full example
here
When using with Django
(and factory_boy
)
When used with Django (to generate fake data with factory_boy
factories),
the root_path
argument of the correspondent file storage shall be provided.
Otherwise (although no errors will be triggered) the generated files will
reside outside the MEDIA_ROOT
directory (by default in /tmp/
on
Linux) and further operations with those files through Django will cause
SuspiciousOperation
exception.
Basic example
Imaginary Django
model
class Upload(models.Model):
"""Upload model."""
name = models.CharField(max_length=255, unique=True)
description = models.TextField(null=True, blank=True)
# File
file = models.FileField(null=True)
See the full example
here
Correspondent factory_boy
factory
from django.conf import settings
from factory import Faker, Trait
from factory.django import DjangoModelFactory
# Import all providers we want to use
from faker_file.providers.bin_file import BinFileProvider
from faker_file.providers.csv_file import CsvFileProvider
from faker_file.providers.docx_file import DocxFileProvider
from faker_file.providers.eml_file import EmlFileProvider
from faker_file.providers.epub_file import EpubFileProvider
from faker_file.providers.ico_file import IcoFileProvider
from faker_file.providers.jpeg_file import JpegFileProvider
from faker_file.providers.mp3_file import Mp3FileProvider
from faker_file.providers.ods_file import OdsFileProvider
from faker_file.providers.odt_file import OdtFileProvider
from faker_file.providers.pdf_file import PdfFileProvider
from faker_file.providers.png_file import PngFileProvider
from faker_file.providers.pptx_file import PptxFileProvider
from faker_file.providers.rtf_file import RtfFileProvider
from faker_file.providers.svg_file import SvgFileProvider
from faker_file.providers.txt_file import TxtFileProvider
from faker_file.providers.webp_file import WebpFileProvider
from faker_file.providers.xlsx_file import XlsxFileProvider
from faker_file.providers.zip_file import ZipFileProvider
# Import file storage, because we need to customize things in order for it
# to work with Django.
from faker_file.storages.filesystem import FileSystemStorage
from upload.models import Upload
# Add all providers we want to use
Faker.add_provider(BinFileProvider)
Faker.add_provider(CsvFileProvider)
Faker.add_provider(DocxFileProvider)
Faker.add_provider(EmlFileProvider)
Faker.add_provider(EpubFileProvider)
Faker.add_provider(IcoFileProvider)
Faker.add_provider(JpegFileProvider)
Faker.add_provider(Mp3FileProvider)
Faker.add_provider(OdsFileProvider)
Faker.add_provider(OdtFileProvider)
Faker.add_provider(PdfFileProvider)
Faker.add_provider(PngFileProvider)
Faker.add_provider(PptxFileProvider)
Faker.add_provider(RtfFileProvider)
Faker.add_provider(SvgFileProvider)
Faker.add_provider(TxtFileProvider)
Faker.add_provider(WebpFileProvider)
Faker.add_provider(XlsxFileProvider)
Faker.add_provider(ZipFileProvider)
# Define a file storage. When working with Django and FileSystemStorage
# you need to set the value of `root_path` argument to
# `settings.MEDIA_ROOT`.
STORAGE = FileSystemStorage(root_path=settings.MEDIA_ROOT, rel_path="tmp")
class UploadFactory(DjangoModelFactory):
"""Upload factory."""
name = Faker("text", max_nb_chars=100)
description = Faker("text", max_nb_chars=1000)
class Meta:
model = Upload
class Params:
bin_file = Trait(file=Faker("bin_file", storage=STORAGE))
csv_file = Trait(file=Faker("csv_file", storage=STORAGE))
docx_file = Trait(file=Faker("docx_file", storage=STORAGE))
eml_file = Trait(file=Faker("eml_file", storage=STORAGE))
epub_file = Trait(file=Faker("epub_file", storage=STORAGE))
ico_file = Trait(file=Faker("ico_file", storage=STORAGE))
jpeg_file = Trait(file=Faker("jpeg_file", storage=STORAGE))
mp3_file = Trait(file=Faker("mp3_file", storage=STORAGE))
ods_file = Trait(file=Faker("ods_file", storage=STORAGE))
odt_file = Trait(file=Faker("odt_file", storage=STORAGE))
pdf_file = Trait(file=Faker("pdf_file", storage=STORAGE))
png_file = Trait(file=Faker("png_file", storage=STORAGE))
pptx_file = Trait(file=Faker("pptx_file", storage=STORAGE))
rtf_file = Trait(file=Faker("rtf_file", storage=STORAGE))
svg_file = Trait(file=Faker("svg_file", storage=STORAGE))
txt_file = Trait(file=Faker("txt_file", storage=STORAGE))
webp_file = Trait(file=Faker("webp_file", storage=STORAGE))
xlsx_file = Trait(file=Faker("xlsx_file", storage=STORAGE))
zip_file = Trait(file=Faker("zip_file", storage=STORAGE))
And then somewhere in your code:
UploadFactory(bin_file=True) # Upload with BIN file
UploadFactory(docx_file=True) # Upload with DOCX file
UploadFactory(jpeg_file=True) # Upload with JPEG file
UploadFactory(zip_file=True) # Upload with ZIP file
See the full example
here
Randomize provider choice
from random import choice
from factory import Faker, LazyAttribute, Trait
from faker import Faker as OriginalFaker
FAKER = OriginalFaker()
FAKER.add_provider(BinFileProvider)
FAKER.add_provider(CsvFileProvider)
FAKER.add_provider(DocxFileProvider)
FAKER.add_provider(EmlFileProvider)
FAKER.add_provider(EpubFileProvider)
FAKER.add_provider(IcoFileProvider)
FAKER.add_provider(JpegFileProvider)
FAKER.add_provider(Mp3FileProvider)
FAKER.add_provider(OdsFileProvider)
FAKER.add_provider(OdtFileProvider)
FAKER.add_provider(PdfFileProvider)
FAKER.add_provider(PngFileProvider)
FAKER.add_provider(PptxFileProvider)
FAKER.add_provider(RtfFileProvider)
FAKER.add_provider(SvgFileProvider)
FAKER.add_provider(TxtFileProvider)
FAKER.add_provider(WebpFileProvider)
FAKER.add_provider(XlsxFileProvider)
FAKER.add_provider(ZipFileProvider)
def random_file_generator(*args, **kwargs):
random_provider = choice(
[
"bin_file",
"csv_file",
"docx_file",
"eml_file",
"epub_file",
"ico_file",
"jpeg_file",
"mp3_file",
"ods_file",
"odt_file",
"pdf_file",
"png_file",
"pptx_file",
"rtf_file",
"svg_file",
"txt_file",
"webp_file",
"xlsx_file",
"zip_file",
]
)
func = getattr(FAKER, random_provider)
return func(storage=STORAGE)
class UploadFactory(DjangoModelFactory):
"""Upload factory."""
name = Faker("text", max_nb_chars=100)
description = Faker("text", max_nb_chars=1000)
class Meta:
model = Upload
class Params:
random_file = Trait(file=LazyAttribute(random_file_generator))
And then somewhere in your code:
# Upload with randon file
upload = UploadFactory(random_file=True)
See the full example
here
Use a different locale
Faker.add_provider(OdtFileProvider)
upload = UploadFactory()
See the full example
here
Other Django usage examples
Faker example with AWS S3 storage
from faker import Faker
from faker_file.storages.aws_s3 import AWSS3Storage
STORAGE = AWSS3Storage(
bucket_name=settings.AWS_STORAGE_BUCKET_NAME,
root_path="",
rel_path="",
)
FAKER = Faker()
FAKER.add_provider(PdfFileProvider)
pdf_file = FAKER.pdf_file(storage=STORAGE)
See the full example
here
factory-boy example with AWS S3 storage
from factory import Faker
from faker_file.providers.pdf_file import PdfFileProvider
from faker_file.storages.aws_s3 import AWSS3Storage
STORAGE = AWSS3Storage(
bucket_name=settings.AWS_STORAGE_BUCKET_NAME,
root_path="",
rel_path="",
)
Faker.add_provider(PdfFileProvider)
class UploadFactory(DjangoModelFactory):
name = Faker("word")
description = Faker("text")
file = Faker("pdf_file", storage=STORAGE)
class Meta:
model = Upload
# Usage example
upload = UploadFactory()
See the full example
here
Flexible storage selection
from django.core.files.storage import default_storage
from faker_file.storages.aws_s3 import AWSS3Storage
from faker_file.storages.filesystem import FileSystemStorage
from storages.backends.s3boto3 import S3Boto3Storage
# Faker doesn't know anything about Django. That's why, if we want to
# support remote storages, we need to manually check which file storage
# backend is used. If `Boto3` storage backend (of the `django-storages`
# package) is used we use the correspondent `AWSS3Storage` class of the
# `faker-file`.
# Otherwise, fall back to native file system storage (`FileSystemStorage`)
# of the `faker-file`.
if isinstance(default_storage, S3Boto3Storage):
STORAGE = AWSS3Storage(
bucket_name=settings.AWS_STORAGE_BUCKET_NAME,
credentials={
"key_id": settings.AWS_ACCESS_KEY_ID,
"key_secret": settings.AWS_SECRET_ACCESS_KEY,
},
root_path="",
rel_path="tmp",
)
else:
STORAGE = FileSystemStorage(
root_path=settings.MEDIA_ROOT,
rel_path="tmp",
)
See the full example
here