From 284d030551d03afa7a29799785472343a2d87fb1 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Wed, 3 Mar 2021 10:36:56 -0500 Subject: [PATCH 01/13] Update build.sh --- .jenkins/build.sh | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index 1e1a06f7ee9..b1220e89b90 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -22,6 +22,12 @@ pip install -r $DIR/../requirements.txt # export PATH=/opt/conda/bin:$PATH # pip install sphinx==1.8.2 pandas +#Install PyTorch Nightly for test. +# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html +# RC Link +pip uninstall -y torch torchvision torchaudio torchtext +pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext + # For Tensorboard. Until 1.14 moves to the release channel. pip install tb-nightly From 21bd643725378e20f151fbdb4c162471b665effc Mon Sep 17 00:00:00 2001 From: moto <855818+mthrok@users.noreply.github.com> Date: Wed, 3 Mar 2021 22:41:03 -0500 Subject: [PATCH 02/13] Update audio tutorial for release pytorch 1.8 / torchaudio 0.8 (#1379) * [wip] replace audio tutorial * Update * Update * Update * fixup * Update requirements.txt * update * Update Co-authored-by: Brian Johnson --- .../audio_preprocessing_tutorial.py | 1312 ++++++++++++++--- requirements.txt | 3 + 2 files changed, 1082 insertions(+), 233 deletions(-) diff --git a/beginner_source/audio_preprocessing_tutorial.py b/beginner_source/audio_preprocessing_tutorial.py index 6b34396aef9..b80bb323536 100644 --- a/beginner_source/audio_preprocessing_tutorial.py +++ b/beginner_source/audio_preprocessing_tutorial.py @@ -1,385 +1,1231 @@ """ -Audio I/O and Pre-Processing with torchaudio -============================================ +Audio manipulation with torchaudio +================================== -PyTorch is an open source deep learning platform that provides a -seamless path from research prototyping to production deployment with -GPU support. +``torchaudio`` provides powerful audio I/O functions, preprocessing +transforms and dataset. -Significant effort in solving machine learning problems goes into data -preparation. ``torchaudio`` leverages PyTorch’s GPU support, and provides -many tools to make data loading easy and more readable. In this -tutorial, we will see how to load and preprocess data from a simple -dataset. Please visit -`Audio I/O and Pre-Processing with torchaudio `__ to learn more. - -For this tutorial, please make sure the ``matplotlib`` package is -installed for easier visualization. +In this tutorial, we will look into how to prepare audio data and +extract features that can be fed to NN models. """ -# Uncomment the following line to run in Google Colab -# !pip install torchaudio import torch import torchaudio +import torchaudio.functional as F +import torchaudio.transforms as T + +print(torch.__version__) +print(torchaudio.__version__) + + +###################################################################### +# Preparing data and utility functions (skip this section) +# -------------------------------------------------------- +# + +#@title Prepare data and utility functions. {display-mode: "form"} +#@markdown +#@markdown You do not need to look into this cell. +#@markdown Just execute once and you are good to go. +#@markdown +#@markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/), which is licensed under Creative Commos BY 4.0. + +#------------------------------------------------------------------------------- +# Preparation of data and helper functions. +#------------------------------------------------------------------------------- +import io +import os +import math +import tarfile +import multiprocessing + +import scipy +import librosa +import boto3 +from botocore import UNSIGNED +from botocore.config import Config import requests +import matplotlib import matplotlib.pyplot as plt +from IPython.display import Audio, display + +[width, height] = matplotlib.rcParams['figure.figsize'] +if width < 10: + matplotlib.rcParams['figure.figsize'] = [width * 2.5, height] + +_SAMPLE_DIR = "_sample_data" +SAMPLE_WAV_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.wav" +SAMPLE_WAV_PATH = os.path.join(_SAMPLE_DIR, "steam.wav") + +SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav" +SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav") + +SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav" +SAMPLE_RIR_PATH = os.path.join(_SAMPLE_DIR, "rir.wav") + +SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav" +SAMPLE_NOISE_PATH = os.path.join(_SAMPLE_DIR, "bg.wav") + +SAMPLE_MP3_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.mp3" +SAMPLE_MP3_PATH = os.path.join(_SAMPLE_DIR, "steam.mp3") + +SAMPLE_GSM_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.gsm" +SAMPLE_GSM_PATH = os.path.join(_SAMPLE_DIR, "steam.gsm") + +SAMPLE_TAR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit.tar.gz" +SAMPLE_TAR_PATH = os.path.join(_SAMPLE_DIR, "sample.tar.gz") +SAMPLE_TAR_ITEM = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav" + +S3_BUCKET = "pytorch-tutorial-assets" +S3_KEY = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav" + +YESNO_DATASET_PATH = os.path.join(_SAMPLE_DIR, "yes_no") +os.makedirs(YESNO_DATASET_PATH, exist_ok=True) +os.makedirs(_SAMPLE_DIR, exist_ok=True) + +def _fetch_data(): + uri = [ + (SAMPLE_WAV_URL, SAMPLE_WAV_PATH), + (SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH), + (SAMPLE_RIR_URL, SAMPLE_RIR_PATH), + (SAMPLE_NOISE_URL, SAMPLE_NOISE_PATH), + (SAMPLE_MP3_URL, SAMPLE_MP3_PATH), + (SAMPLE_GSM_URL, SAMPLE_GSM_PATH), + (SAMPLE_TAR_URL, SAMPLE_TAR_PATH), + ] + for url, path in uri: + with open(path, 'wb') as file_: + file_.write(requests.get(url).content) + +_fetch_data() + +def _download_yesno(): + if os.path.exists(os.path.join(YESNO_DATASET_PATH, "waves_yesno.tar.gz")): + return + torchaudio.datasets.YESNO(root=YESNO_DATASET_PATH, download=True) + +YESNO_DOWNLOAD_PROCESS = multiprocessing.Process(target=_download_yesno) +YESNO_DOWNLOAD_PROCESS.start() + +def _get_sample(path, resample=None): + effects = [ + ["remix", "1"] + ] + if resample: + effects.append(["rate", f'{resample}']) + return torchaudio.sox_effects.apply_effects_file(path, effects=effects) + +def get_speech_sample(*, resample=None): + return _get_sample(SAMPLE_WAV_SPEECH_PATH, resample=resample) + +def get_sample(*, resample=None): + return _get_sample(SAMPLE_WAV_PATH, resample=resample) + +def get_rir_sample(*, resample=None, processed=False): + rir_raw, sample_rate = _get_sample(SAMPLE_RIR_PATH, resample=resample) + if not processed: + return rir_raw, sample_rate + rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)] + rir = rir / torch.norm(rir, p=2) + rir = torch.flip(rir, [1]) + return rir, sample_rate + +def get_noise_sample(*, resample=None): + return _get_sample(SAMPLE_NOISE_PATH, resample=resample) + +def print_metadata(metadata, src=None): + if src: + print("-" * 10) + print("Source:", src) + print("-" * 10) + print(" - sample_rate:", metadata.sample_rate) + print(" - num_channels:", metadata.num_channels) + print(" - num_frames:", metadata.num_frames) + print(" - bits_per_sample:", metadata.bits_per_sample) + print(" - encoding:", metadata.encoding) + print() + +def print_stats(waveform, sample_rate=None, src=None): + if src: + print("-" * 10) + print("Source:", src) + print("-" * 10) + if sample_rate: + print("Sample Rate:", sample_rate) + print("Shape:", tuple(waveform.shape)) + print("Dtype:", waveform.dtype) + print(f" - Max: {waveform.max().item():6.3f}") + print(f" - Min: {waveform.min().item():6.3f}") + print(f" - Mean: {waveform.mean().item():6.3f}") + print(f" - Std Dev: {waveform.std().item():6.3f}") + print() + print(waveform) + print() + +def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None): + waveform = waveform.numpy() + + num_channels, num_frames = waveform.shape + time_axis = torch.arange(0, num_frames) / sample_rate + + figure, axes = plt.subplots(num_channels, 1) + if num_channels == 1: + axes = [axes] + for c in range(num_channels): + axes[c].plot(time_axis, waveform[c], linewidth=1) + axes[c].grid(True) + if num_channels > 1: + axes[c].set_ylabel(f'Channel {c+1}') + if xlim: + axes[c].set_xlim(xlim) + if ylim: + axes[c].set_ylim(ylim) + figure.suptitle(title) + plt.show(block=False) + +def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None): + waveform = waveform.numpy() + + num_channels, num_frames = waveform.shape + time_axis = torch.arange(0, num_frames) / sample_rate + + figure, axes = plt.subplots(num_channels, 1) + if num_channels == 1: + axes = [axes] + for c in range(num_channels): + axes[c].specgram(waveform[c], Fs=sample_rate) + if num_channels > 1: + axes[c].set_ylabel(f'Channel {c+1}') + if xlim: + axes[c].set_xlim(xlim) + figure.suptitle(title) + plt.show(block=False) + +def play_audio(waveform, sample_rate): + waveform = waveform.numpy() + + num_channels, num_frames = waveform.shape + if num_channels == 1: + display(Audio(waveform[0], rate=sample_rate)) + elif num_channels == 2: + display(Audio((waveform[0], waveform[1]), rate=sample_rate)) + else: + raise ValueError("Waveform with more than 2 channels are not supported.") + +def inspect_file(path): + print("-" * 10) + print("Source:", path) + print("-" * 10) + print(f" - File size: {os.path.getsize(path)} bytes") + print_metadata(torchaudio.info(path)) + +def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None): + fig, axs = plt.subplots(1, 1) + axs.set_title(title or 'Spectrogram (db)') + axs.set_ylabel(ylabel) + axs.set_xlabel('frame') + im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect) + if xmax: + axs.set_xlim((0, xmax)) + fig.colorbar(im, ax=axs) + plt.show(block=False) + +def plot_mel_fbank(fbank, title=None): + fig, axs = plt.subplots(1, 1) + axs.set_title(title or 'Filter bank') + axs.imshow(fbank, aspect='auto') + axs.set_ylabel('frequency bin') + axs.set_xlabel('mel bin') + plt.show(block=False) + +def get_spectrogram( + n_fft = 400, + win_len = None, + hop_len = None, + power = 2.0, +): + waveform, _ = get_speech_sample() + spectrogram = T.Spectrogram( + n_fft=n_fft, + win_length=win_len, + hop_length=hop_len, + center=True, + pad_mode="reflect", + power=power, + ) + return spectrogram(waveform) + +def plot_pitch(waveform, sample_rate, pitch): + figure, axis = plt.subplots(1, 1) + axis.set_title("Pitch Feature") + axis.grid(True) + + end_time = waveform.shape[1] / sample_rate + time_axis = torch.linspace(0, end_time, waveform.shape[1]) + axis.plot(time_axis, waveform[0], linewidth=1, color='gray', alpha=0.3) + + axis2 = axis.twinx() + time_axis = torch.linspace(0, end_time, pitch.shape[1]) + ln2 = axis2.plot( + time_axis, pitch[0], linewidth=2, label='Pitch', color='green') + + axis2.legend(loc=0) + plt.show(block=False) + +def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc): + figure, axis = plt.subplots(1, 1) + axis.set_title("Kaldi Pitch Feature") + axis.grid(True) + + end_time = waveform.shape[1] / sample_rate + time_axis = torch.linspace(0, end_time, waveform.shape[1]) + axis.plot(time_axis, waveform[0], linewidth=1, color='gray', alpha=0.3) + + time_axis = torch.linspace(0, end_time, pitch.shape[1]) + ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green') + axis.set_ylim((-1.3, 1.3)) + + axis2 = axis.twinx() + time_axis = torch.linspace(0, end_time, nfcc.shape[1]) + ln2 = axis2.plot( + time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--') + + lns = ln1 + ln2 + labels = [l.get_label() for l in lns] + axis.legend(lns, labels, loc=0) + plt.show(block=False) + ###################################################################### -# Opening a file -# ----------------- +# Audio I/O +# ========= # -# ``torchaudio`` also supports loading sound files in the wav and mp3 format. We -# call waveform the resulting raw audio signal. +# torchaudio integrates ``libsox`` and provides a rich set of audio I/O. # -url = "https://pytorch.org/tutorials/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav" -r = requests.get(url) -with open('steam-train-whistle-daniel_simon-converted-from-mp3.wav', 'wb') as f: - f.write(r.content) +###################################################################### +# Quering audio metadata +# ---------------------- +# +# ``torchaudio.info`` function fetches metadata of audio. You can provide +# a path-like object or file-like object. +# -filename = "steam-train-whistle-daniel_simon-converted-from-mp3.wav" -waveform, sample_rate = torchaudio.load(filename) +metadata = torchaudio.info(SAMPLE_WAV_PATH) +print_metadata(metadata, src=SAMPLE_WAV_PATH) -print("Shape of waveform: {}".format(waveform.size())) -print("Sample rate of waveform: {}".format(sample_rate)) -plt.figure() -plt.plot(waveform.t().numpy()) +###################################################################### +# Where +# +# - ``sample_rate`` is the sampling rate of the audio +# - ``num_channels`` is the number of channels +# - ``num_frames`` is the number of frames per channel +# - ``bits_per_sample`` is bit depth +# - ``encoding`` is the sample coding format +# +# The values ``encoding`` can take are one of the following +# +# - ``"PCM_S"``: Signed integer linear PCM +# - ``"PCM_U"``: Unsigned integer linear PCM +# - ``"PCM_F"``: Floating point linear PCM +# - ``"FLAC"``: Flac, `Free Lossless Audio +# Codec `__ +# - ``"ULAW"``: Mu-law, +# [`wikipedia `__] +# - ``"ALAW"``: A-law +# [`wikipedia `__] +# - ``"MP3"`` : MP3, MPEG-1 Audio Layer III +# - ``"VORBIS"``: OGG Vorbis [`xiph.org `__] +# - ``"AMR_NB"``: Adaptive Multi-Rate +# [`wikipedia `__] +# - ``"AMR_WB"``: Adaptive Multi-Rate Wideband +# [`wikipedia `__] +# - ``"OPUS"``: Opus [`opus-codec.org `__] +# - ``"GSM"``: GSM-FR +# [`wikipedia `__] +# - ``"UNKNOWN"`` None of avobe +# + ###################################################################### -# When you load a file in ``torchaudio``, you can optionally specify the backend to use either -# `SoX `_ or `SoundFile `_ -# via ``torchaudio.set_audio_backend``. These backends are loaded lazily when needed. +# **Note** +# +# - ``bits_per_sample`` can be ``0`` for formats with compression and/or +# variable bit rate. (such as mp3) +# - ``num_frames`` can be ``0`` for GSM-FR format. # -# ``torchaudio`` also makes JIT compilation optional for functions, and uses ``nn.Module`` where possible. + +metadata = torchaudio.info(SAMPLE_MP3_PATH) +print_metadata(metadata, src=SAMPLE_MP3_PATH) + +metadata = torchaudio.info(SAMPLE_GSM_PATH) +print_metadata(metadata, src=SAMPLE_GSM_PATH) + ###################################################################### -# Transformations -# --------------- +# Querying file-like object +# ~~~~~~~~~~~~~~~~~~~~~~~~~ # -# ``torchaudio`` supports a growing list of -# `transformations `_. +# ``info`` function works on file-like object as well. # -# - **Resample**: Resample waveform to a different sample rate. -# - **Spectrogram**: Create a spectrogram from a waveform. -# - **GriffinLim**: Compute waveform from a linear scale magnitude spectrogram using -# the Griffin-Lim transformation. -# - **ComputeDeltas**: Compute delta coefficients of a tensor, usually a spectrogram. -# - **ComplexNorm**: Compute the norm of a complex tensor. -# - **MelScale**: This turns a normal STFT into a Mel-frequency STFT, -# using a conversion matrix. -# - **AmplitudeToDB**: This turns a spectrogram from the -# power/amplitude scale to the decibel scale. -# - **MFCC**: Create the Mel-frequency cepstrum coefficients from a -# waveform. -# - **MelSpectrogram**: Create MEL Spectrograms from a waveform using the -# STFT function in PyTorch. -# - **MuLawEncoding**: Encode waveform based on mu-law companding. -# - **MuLawDecoding**: Decode mu-law encoded waveform. -# - **TimeStretch**: Stretch a spectrogram in time without modifying pitch for a given rate. -# - **FrequencyMasking**: Apply masking to a spectrogram in the frequency domain. -# - **TimeMasking**: Apply masking to a spectrogram in the time domain. -# -# Each transform supports batching: you can perform a transform on a single raw -# audio signal or spectrogram, or many of the same shape. + +with requests.get(SAMPLE_WAV_URL, stream=True) as response: + metadata = torchaudio.info(response.raw) +print_metadata(metadata, src=SAMPLE_WAV_URL) + + +###################################################################### +# **Note** When passing file-like object, ``info`` function does not read +# all the data, instead it only reads the beginning portion of data. +# Therefore, depending on the audio format, it cannot get the correct +# metadata, including the format itself. The following example illustrates +# this. # -# Since all transforms are ``nn.Modules`` or ``jit.ScriptModules``, they can be -# used as part of a neural network at any point. +# - Use ``format`` argument to tell what audio format it is. +# - The returned metadata has ``num_frames = 0`` # +with requests.get(SAMPLE_MP3_URL, stream=True) as response: + metadata = torchaudio.info(response.raw, format="mp3") + + print(f"Fetched {response.raw.tell()} bytes.") +print_metadata(metadata, src=SAMPLE_MP3_URL) + + +###################################################################### +# Loading audio data into Tensor +# ------------------------------ +# +# To load audio data, you can use ``torchaudio.load``. +# +# This function accepts path-like object and file-like object. +# +# The returned value is a tuple of waveform (``Tensor``) and sample rate +# (``int``). +# +# By default, the resulting tensor object has ``dtype=torch.float32`` and +# its value range is normalized within ``[-1.0, 1.0]``. +# +# For the list of supported format, please refer to `the torchaudio +# documentation `__. +# + +waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH) + +print_stats(waveform, sample_rate=sample_rate) +plot_waveform(waveform, sample_rate) +plot_specgram(waveform, sample_rate) +play_audio(waveform, sample_rate) + + ###################################################################### -# To start, we can look at the log of the spectrogram on a log scale. +# Loading from file-like object +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# ``torchaudio``\ ’s I/O functions now support file-like object. This +# allows to fetch audio data and decode at the same time from the location +# other than local file system. The following examples illustrates this. +# + +# Load audio data as HTTP request +with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response: + waveform, sample_rate = torchaudio.load(response.raw) +plot_specgram(waveform, sample_rate, title="HTTP datasource") + +# Load audio from tar file +with tarfile.open(SAMPLE_TAR_PATH, mode='r') as tarfile_: + fileobj = tarfile_.extractfile(SAMPLE_TAR_ITEM) + waveform, sample_rate = torchaudio.load(fileobj) +plot_specgram(waveform, sample_rate, title="TAR file") + +# Load audio from S3 +client = boto3.client('s3', config=Config(signature_version=UNSIGNED)) +response = client.get_object(Bucket=S3_BUCKET, Key=S3_KEY) +waveform, sample_rate = torchaudio.load(response['Body']) +plot_specgram(waveform, sample_rate, title="From S3") + + + +###################################################################### +# Tips on slicing +# ~~~~~~~~~~~~~~~ +# +# Providing ``num_frames`` and ``frame_offset`` arguments will slice the +# resulting Tensor object while decoding. +# +# The same result can be achieved using the regular Tensor slicing, +# (i.e. ``waveform[:, frame_offset:frame_offset+num_frames]``) however, +# providing ``num_frames`` and ``frame_offset`` arguments is more +# efficient. +# +# This is because the function will stop data acquisition and decoding +# once it finishes decoding the requested frames. This is advantageous +# when the audio data are transfered via network as the data transfer will +# stop as soon as the necessary amount of data is fetched. +# +# The following example illustrates this; # -specgram = torchaudio.transforms.Spectrogram()(waveform) +# Illustration of two different decoding methods. +# The first one will fetch all the data and decode them, while +# the second one will stop fetching data once it completes decoding. +# The resulting waveforms are identical. -print("Shape of spectrogram: {}".format(specgram.size())) +frame_offset, num_frames = 16000, 16000 # Fetch and decode the 1 - 2 seconds + +print("Fetching all the data...") +with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response: + waveform1, sample_rate1 = torchaudio.load(response.raw) + waveform1 = waveform1[:, frame_offset:frame_offset+num_frames] + print(f" - Fetched {response.raw.tell()} bytes") + +print("Fetching until the requested frames are available...") +with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response: + waveform2, sample_rate2 = torchaudio.load( + response.raw, frame_offset=frame_offset, num_frames=num_frames) + print(f" - Fetched {response.raw.tell()} bytes") + +print("Checking the resulting waveform ... ", end="") +assert (waveform1 == waveform2).all() +print("matched!") -plt.figure() -plt.imshow(specgram.log2()[0,:,:].numpy(), cmap='gray') ###################################################################### -# Or we can look at the Mel Spectrogram on a log scale. +# Saving audio to file +# -------------------- +# +# To save audio data in the formats intepretable by common applications, +# you can use ``torchaudio.save``. +# +# This function accepts path-like object and file-like object. +# +# When passing file-like object, you also need to provide ``format`` +# argument so that the function knows which format it should be using. In +# case of path-like object, the function will detemine the format based on +# the extension. If you are saving to a file without extension, you need +# to provide ``format`` argument. +# +# When saving as WAV format, the default encoding for ``float32`` Tensor +# is 32-bit floating-point PCM. You can provide ``encoding`` and +# ``bits_per_sample`` argument to change this. For example, to save data +# in 16 bit signed integer PCM, you can do the following. # +# **Note** Saving data in encodings with lower bit depth reduces the +# resulting file size but loses precision. +# + +waveform, sample_rate = get_sample() +print_stats(waveform, sample_rate=sample_rate) -specgram = torchaudio.transforms.MelSpectrogram()(waveform) +# Save without any encoding option. +# The function will pick up the encoding which +# the provided data fit +path = "save_example_default.wav" +torchaudio.save(path, waveform, sample_rate) +inspect_file(path) -print("Shape of spectrogram: {}".format(specgram.size())) +# Save as 16-bit signed integer Linear PCM +# The resulting file occupies half the storage but loses precision +path = "save_example_PCM_S16.wav" +torchaudio.save( + path, waveform, sample_rate, + encoding="PCM_S", bits_per_sample=16) +inspect_file(path) -plt.figure() -p = plt.imshow(specgram.log2()[0,:,:].detach().numpy(), cmap='gray') ###################################################################### -# We can resample the waveform, one channel at a time. +# ``torchaudio.save`` can also handle other formats. To name a few; # -new_sample_rate = sample_rate/10 +waveform, sample_rate = get_sample() -# Since Resample applies to a single channel, we resample first channel here -channel = 0 -transformed = torchaudio.transforms.Resample(sample_rate, new_sample_rate)(waveform[channel,:].view(1,-1)) +formats = [ + "mp3", + "flac", + "vorbis", + "sph", + "amb", + "amr-nb", + "gsm", +] -print("Shape of transformed waveform: {}".format(transformed.size())) +for format in formats: + path = f"save_example.{format}" + torchaudio.save(path, waveform, sample_rate, format=format) + inspect_file(path) -plt.figure() -plt.plot(transformed[0,:].numpy()) ###################################################################### -# As another example of transformations, we can encode the signal based on -# Mu-Law enconding. But to do so, we need the signal to be between -1 and -# 1. Since the tensor is just a regular PyTorch tensor, we can apply -# standard operators on it. +# Saving to file-like object +# ~~~~~~~~~~~~~~~~~~~~~~~~~~ # +# Similar to the other I/O functions, you can save audio into file-like +# object. When saving to file-like object, ``format`` argument is +# required. +# + +waveform, sample_rate = get_sample() -# Let's check if the tensor is in the interval [-1,1] -print("Min of waveform: {}\nMax of waveform: {}\nMean of waveform: {}".format(waveform.min(), waveform.max(), waveform.mean())) +# Saving to Bytes buffer +buffer_ = io.BytesIO() +torchaudio.save(buffer_, waveform, sample_rate, format="wav") + +buffer_.seek(0) +print(buffer_.read(16)) ###################################################################### -# Since the waveform is already between -1 and 1, we do not need to -# normalize it. +# Data Augmentation +# ================= +# +# ``torchaudio`` provides a variety of ways to augment audio data. # -def normalize(tensor): - # Subtract the mean, and scale to the interval [-1,1] - tensor_minusmean = tensor - tensor.mean() - return tensor_minusmean/tensor_minusmean.abs().max() -# Let's normalize to the full interval [-1,1] -# waveform = normalize(waveform) +###################################################################### +# Applying effects and filtering +# ------------------------------ +# +# ``torchaudio.sox_effects`` module provides ways to apply filiters like +# ``sox`` command on Tensor objects and file-object audio sources +# directly. +# +# There are two functions for this; +# +# - ``torchaudio.sox_effects.apply_effects_tensor`` for applying effects +# on Tensor +# - ``torchaudio.sox_effects.apply_effects_file`` for applying effects on +# other audio source +# +# Both function takes effects in the form of ``List[List[str]]``. This +# mostly corresponds to how ``sox`` command works, but one caveat is that +# ``sox`` command adds some effects automatically, but torchaudio’s +# implementation does not do that. +# +# For the list of available effects, please refer to `the sox +# documentation `__. +# +# **Tip** If you need to load and resample your audio data on-the-fly, +# then you can use ``torchaudio.sox_effects.apply_effects_file`` with +# ``"rate"`` effect. +# +# **Note** ``apply_effects_file`` accepts file-like object or path-like +# object. Similar to ``torchaudio.load``, when the audio format cannot be +# detected from either file extension or header, you can provide +# ``format`` argument to tell what format the audio source is. +# +# **Note** This process is not differentiable. +# + +# Load the data +waveform1, sample_rate1 = get_sample(resample=16000) + +# Define effects +effects = [ + ["lowpass", "-1", "300"], # apply single-pole lowpass filter + ["speed", "0.8"], # reduce the speed + # This only changes sample rate, so it is necessary to + # add `rate` effect with original sample rate after this. + ["rate", f"{sample_rate1}"], + ["reverb", "-w"], # Reverbration gives some dramatic feeling +] + +# Apply effects +waveform2, sample_rate2 = torchaudio.sox_effects.apply_effects_tensor( + waveform1, sample_rate1, effects) + +plot_waveform(waveform1, sample_rate1, title="Original", xlim=(-.1, 3.2)) +plot_waveform(waveform2, sample_rate2, title="Effects Applied", xlim=(-.1, 3.2)) +print_stats(waveform1, sample_rate=sample_rate1, src="Original") +print_stats(waveform2, sample_rate=sample_rate2, src="Effects Applied") + ###################################################################### -# Let’s apply encode the waveform. +# Note that the number of frames and number of channels are different from +# the original after the effects. Let’s listen to the audio. Doesn’t it +# sound more dramatic? # -transformed = torchaudio.transforms.MuLawEncoding()(waveform) +plot_specgram(waveform1, sample_rate1, title="Original", xlim=(0, 3.04)) +play_audio(waveform1, sample_rate1) +plot_specgram(waveform2, sample_rate2, title="Effects Applied", xlim=(0, 3.04)) +play_audio(waveform2, sample_rate2) + -print("Shape of transformed waveform: {}".format(transformed.size())) +###################################################################### +# Simulating room reverbration +# ---------------------------- +# +# `Convolution +# reverb `__ is a +# technique used to make a clean audio data sound like in a different +# environment. +# +# Using Room Impulse Response (RIR), we can make a clean speech sound like +# uttered in a conference room. +# +# For this process, we need RIR data. The following data are from VOiCES +# dataset, but you can record one by your self. Just turn on microphone +# and clap you hands. +# -plt.figure() -plt.plot(transformed[0,:].numpy()) +sample_rate = 8000 + +rir_raw, _ = get_rir_sample(resample=sample_rate) + +plot_waveform(rir_raw, sample_rate, title="Room Impulse Response (raw)", ylim=None) +plot_specgram(rir_raw, sample_rate, title="Room Impulse Response (raw)") +play_audio(rir_raw, sample_rate) ###################################################################### -# And now decode. +# First, we need to clean up the RIR. We extract the main impulse, +# normalize the signal power, then flip the time axis. # -reconstructed = torchaudio.transforms.MuLawDecoding()(transformed) +rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)] +rir = rir / torch.norm(rir, p=2) +rir = torch.flip(rir, [1]) -print("Shape of recovered waveform: {}".format(reconstructed.size())) +print_stats(rir) +plot_waveform(rir, sample_rate, title="Room Impulse Response", ylim=None) -plt.figure() -plt.plot(reconstructed[0,:].numpy()) ###################################################################### -# We can finally compare the original waveform with its reconstructed -# version. +# Then we convolve the speech signal with the RIR filter. # -# Compute median relative difference -err = ((waveform-reconstructed).abs() / waveform.abs()).median() +speech, _ = get_speech_sample(resample=sample_rate) + +speech_ = torch.nn.functional.pad(speech, (rir.shape[1]-1, 0)) +augmented = torch.nn.functional.conv1d(speech_[None, ...], rir[None, ...])[0] -print("Median relative difference between original and MuLaw reconstucted signals: {:.2%}".format(err)) +plot_waveform(speech, sample_rate, title="Original", ylim=None) +plot_waveform(augmented, sample_rate, title="RIR Applied", ylim=None) + +plot_specgram(speech, sample_rate, title="Original") +play_audio(speech, sample_rate) + +plot_specgram(augmented, sample_rate, title="RIR Applied") +play_audio(augmented, sample_rate) ###################################################################### -# Functional -# --------------- +# Adding background noise +# ----------------------- +# +# To add background noise to audio data, you can simply add audio Tensor +# and noise Tensor. A commonly way to adjust the intensity of noise is to +# change Signal-to-Noise Ratio (SNR). +# [`wikipedia `__] +# +# .. math:: +# +# +# \mathrm{SNR} = \frac{P_\mathrm{signal}}{P_\mathrm{noise}} +# +# .. math:: +# +# +# {\mathrm {SNR_{{dB}}}}=10\log _{{10}}\left({\mathrm {SNR}}\right) +# + +sample_rate = 8000 +speech, _ = get_speech_sample(resample=sample_rate) +noise, _ = get_noise_sample(resample=sample_rate) +noise = noise[:, :speech.shape[1]] + +plot_waveform(noise, sample_rate, title="Background noise") +plot_specgram(noise, sample_rate, title="Background noise") +play_audio(noise, sample_rate) + +speech_power = speech.norm(p=2) +noise_power = noise.norm(p=2) + +for snr_db in [20, 10, 3]: + snr = math.exp(snr_db / 10) + scale = snr * noise_power / speech_power + noisy_speech = (scale * speech + noise) / 2 + + plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]") + plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]") + play_audio(noisy_speech, sample_rate) + + + +###################################################################### +# Applying codec to Tensor object +# ------------------------------- # -# The transformations seen above rely on lower level stateless functions for their computations. -# These functions are available under ``torchaudio.functional``. The complete list is available -# `here `_ and includes: -# -# - **istft**: Inverse short time Fourier Transform. -# - **gain**: Applies amplification or attenuation to the whole waveform. -# - **dither**: Increases the perceived dynamic range of audio stored at a -# particular bit-depth. -# - **compute_deltas**: Compute delta coefficients of a tensor. -# - **equalizer_biquad**: Design biquad peaking equalizer filter and perform filtering. -# - **lowpass_biquad**: Design biquad lowpass filter and perform filtering. -# - **highpass_biquad**:Design biquad highpass filter and perform filtering. +# ``torchaudio.functional.apply_codec`` can apply codecs to Tensor object. # -# For example, let's try the `mu_law_encoding` functional: +# **Note** This process is not differentiable. +# + +waveform, sample_rate = get_speech_sample(resample=8000) + +plot_specgram(waveform, sample_rate, title="Original") +play_audio(waveform, sample_rate) -mu_law_encoding_waveform = torchaudio.functional.mu_law_encoding(waveform, quantization_channels=256) +configs = [ + ({"format": "wav", "encoding": 'ULAW', "bits_per_sample": 8}, "8 bit mu-law"), + ({"format": "gsm"}, "GSM-FR"), + ({"format": "mp3", "compression": -9}, "MP3"), + ({"format": "vorbis", "compression": -1}, "Vorbis"), +] +for param, title in configs: + augmented = F.apply_codec(waveform, sample_rate, **param) + plot_specgram(augmented, sample_rate, title=title) + play_audio(augmented, sample_rate) -print("Shape of transformed waveform: {}".format(mu_law_encoding_waveform.size())) -plt.figure() -plt.plot(mu_law_encoding_waveform[0,:].numpy()) ###################################################################### -# You can see how the output from ``torchaudio.functional.mu_law_encoding`` is the same as -# the output from ``torchaudio.transforms.MuLawEncoding``. -# -# Now let's experiment with a few of the other functionals and visualize their output. Taking our -# spectogram, we can compute it's deltas: +# Simulating a phone recoding +# --------------------------- +# +# Combining the previous techniques, we can simulate audio that sounds +# like a person talking over a phone in a echoey room with people talking +# in the background. +# + +sample_rate = 16000 +speech, _ = get_speech_sample(resample=sample_rate) + +plot_specgram(speech, sample_rate, title="Original") +play_audio(speech, sample_rate) + +# Apply RIR +rir, _ = get_rir_sample(resample=sample_rate, processed=True) +speech_ = torch.nn.functional.pad(speech, (rir.shape[1]-1, 0)) +speech = torch.nn.functional.conv1d(speech_[None, ...], rir[None, ...])[0] + +plot_specgram(speech, sample_rate, title="RIR Applied") +play_audio(speech, sample_rate) + +# Add background noise +# Because the noise is recorded in the actual environment, we consider that +# the noise contains the acoustic feature of the environment. Therefore, we add +# the noise after RIR application. +noise, _ = get_noise_sample(resample=sample_rate) +noise = noise[:, :speech.shape[1]] + +snr_db = 8 +scale = math.exp(snr_db / 10) * noise.norm(p=2) / speech.norm(p=2) +speech = (scale * speech + noise) / 2 + +plot_specgram(speech, sample_rate, title="BG noise added") +play_audio(speech, sample_rate) + +# Apply filtering and change sample rate +speech, sample_rate = torchaudio.sox_effects.apply_effects_tensor( + speech, + sample_rate, + effects=[ + ["lowpass", "4000"], + ["compand", "0.02,0.05", "-60,-60,-30,-10,-20,-8,-5,-8,-2,-8", "-8", "-7", "0.05"], + ["rate", "8000"], + ], +) + +plot_specgram(speech, sample_rate, title="Filtered") +play_audio(speech, sample_rate) + +# Apply telephony codec +speech = F.apply_codec(speech, sample_rate, format="gsm") + +plot_specgram(speech, sample_rate, title="GSM Codec Applied") +play_audio(speech, sample_rate) -computed = torchaudio.functional.compute_deltas(specgram.contiguous(), win_length=3) -print("Shape of computed deltas: {}".format(computed.shape)) -plt.figure() -plt.imshow(computed.log2()[0,:,:].detach().numpy(), cmap='gray') ###################################################################### -# We can take the original waveform and apply different effects to it. -# +# Feature Extractions +# =================== +# +# ``torchaudio`` implements feature extractions commonly used in audio +# domain. They are available in ``torchaudio.functional`` and +# ``torchaudio.transforms``. +# +# ``functional`` module implements features as a stand alone functions. +# They are stateless. +# +# ``transforms`` module implements features in object-oriented manner, +# using implementations from ``functional`` and ``torch.nn.Module``. +# +# Because all the transforms are subclass of ``torch.nn.Module``, they can +# be serialized using TorchScript. +# +# For the complete list of available features, please refer to the +# documentation. In this tutorial, we will look into conversion between +# time domain and frequency domain (``Spectrogram``, ``GriffinLim``, +# ``MelSpectrogram``) and augmentation technique called SpecAugment. +# + + +###################################################################### +# Spectrogram +# ----------- +# +# To get the frequency representation of audio signal, you can use +# ``Spectrogram`` transform. +# + +waveform, sample_rate = get_speech_sample() + +n_fft = 1024 +win_length = None +hop_length = 512 + +# define transformation +spectrogram = T.Spectrogram( + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, + center=True, + pad_mode="reflect", + power=2.0, +) +# Perform transformation +spec = spectrogram(waveform) + +print_stats(spec) +plot_spectrogram(spec[0], title='torchaudio') + + + +###################################################################### +# GriffinLim +# ---------- +# +# To recover a waveform from spectrogram, you can use ``GriffinLim``. +# + +torch.random.manual_seed(0) +waveform, sample_rate = get_speech_sample() +plot_waveform(waveform, sample_rate, title="Original") +play_audio(waveform, sample_rate) + +n_fft = 1024 +win_length = None +hop_length = 512 + +spec = T.Spectrogram( + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, +)(waveform) + +griffin_lim = T.GriffinLim( + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, +) +waveform = griffin_lim(spec) + +plot_waveform(waveform, sample_rate, title="Reconstructed") +play_audio(waveform, sample_rate) + + + +###################################################################### +# Mel Filter Bank +# --------------- +# +# ``torchaudio.functional.create_fb_matrix`` can generate the filter bank +# to convert frequency bins to Mel-scale bins. +# +# Since this function does not require input audio/features, there is no +# equivalent transform in ``torchaudio.transforms``. +# + +n_fft = 256 +n_mels = 64 +sample_rate = 6000 + +mel_filters = F.create_fb_matrix( + int(n_fft // 2 + 1), + n_mels=n_mels, + f_min=0., + f_max=sample_rate/2., + sample_rate=sample_rate, + norm='slaney' +) +plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio") -gain_waveform = torchaudio.functional.gain(waveform, gain_db=5.0) -print("Min of gain_waveform: {}\nMax of gain_waveform: {}\nMean of gain_waveform: {}".format(gain_waveform.min(), gain_waveform.max(), gain_waveform.mean())) -dither_waveform = torchaudio.functional.dither(waveform) -print("Min of dither_waveform: {}\nMax of dither_waveform: {}\nMean of dither_waveform: {}".format(dither_waveform.min(), dither_waveform.max(), dither_waveform.mean())) ###################################################################### -# Another example of the capabilities in ``torchaudio.functional`` are applying filters to our -# waveform. Applying the lowpass biquad filter to our waveform will output a new waveform with -# the signal of the frequency modified. +# Comparison against librosa +# ~~~~~~~~~~~~~~~~~~~~~~~~~~ +# +# As a comparison, here is the equivalent way to get the mel filter bank +# with ``librosa``. +# +# **Note** Currently, the result matches only when ``htk=True``. +# ``torchaudio`` does not support the equivalent of ``htk=False`` option. +# -lowpass_waveform = torchaudio.functional.lowpass_biquad(waveform, sample_rate, cutoff_freq=3000) +mel_filters_librosa = librosa.filters.mel( + sample_rate, + n_fft, + n_mels=n_mels, + fmin=0., + fmax=sample_rate/2., + norm='slaney', + htk=True, +).T -print("Min of lowpass_waveform: {}\nMax of lowpass_waveform: {}\nMean of lowpass_waveform: {}".format(lowpass_waveform.min(), lowpass_waveform.max(), lowpass_waveform.mean())) +plot_mel_fbank(mel_filters_librosa, "Mel Filter Bank - librosa") + +mse = torch.square(mel_filters - mel_filters_librosa).mean().item() +print('Mean Square Difference: ', mse) -plt.figure() -plt.plot(lowpass_waveform.t().numpy()) ###################################################################### -# We can also visualize a waveform with the highpass biquad filter. +# MelSpectrogram +# -------------- +# +# Mel-scale spectrogram is a combination of Spectrogram and mel scale +# conversion. In ``torchaudio``, there is a transform ``MelSpectrogram`` +# which is composed of ``Spectrogram`` and ``MelScale``. # -highpass_waveform = torchaudio.functional.highpass_biquad(waveform, sample_rate, cutoff_freq=2000) +waveform, sample_rate = get_speech_sample() + +n_fft = 1024 +win_length = None +hop_length = 512 +n_mels = 128 + +mel_spectrogram = T.MelSpectrogram( + sample_rate=sample_rate, + n_fft=n_fft, + win_length=win_length, + hop_length=hop_length, + center=True, + pad_mode="reflect", + power=2.0, + norm='slaney', + onesided=True, + n_mels=n_mels, +) -print("Min of highpass_waveform: {}\nMax of highpass_waveform: {}\nMean of highpass_waveform: {}".format(highpass_waveform.min(), highpass_waveform.max(), highpass_waveform.mean())) +melspec = mel_spectrogram(waveform) +plot_spectrogram( + melspec[0], title="MelSpectrogram - torchaudio", ylabel='mel freq') -plt.figure() -plt.plot(highpass_waveform.t().numpy()) ###################################################################### -# Migrating to torchaudio from Kaldi -# ---------------------------------- +# Comparison against librosa +# ~~~~~~~~~~~~~~~~~~~~~~~~~~ # -# Users may be familiar with -# `Kaldi `_, a toolkit for speech -# recognition. ``torchaudio`` offers compatibility with it in -# ``torchaudio.kaldi_io``. It can indeed read from kaldi scp, or ark file -# or streams with: +# As a comparison, here is the equivalent way to get Mel-scale spectrogram +# with ``librosa``. # -# - read_vec_int_ark -# - read_vec_flt_scp -# - read_vec_flt_arkfile/stream -# - read_mat_scp -# - read_mat_ark +# **Note** Currently, the result matches only when ``htk=True``. +# ``torchaudio`` does not support the equivalent of ``htk=False`` option. # -# ``torchaudio`` provides Kaldi-compatible transforms for ``spectrogram``, -# ``fbank``, ``mfcc``, and ``resample_waveform with the benefit of GPU support, see -# `here `__ for more information. + +melspec_librosa = librosa.feature.melspectrogram( + waveform.numpy()[0], + sr=sample_rate, + n_fft=n_fft, + hop_length=hop_length, + win_length=win_length, + center=True, + pad_mode="reflect", + power=2.0, + n_mels=n_mels, + norm='slaney', + htk=True, +) +plot_spectrogram( + melspec_librosa, title="MelSpectrogram - librosa", ylabel='mel freq') + +mse = torch.square(melspec - melspec_librosa).mean().item() +print('Mean Square Difference: ', mse) + + +###################################################################### +# MFCC +# ---- # -n_fft = 400.0 -frame_length = n_fft / sample_rate * 1000.0 -frame_shift = frame_length / 2.0 +waveform, sample_rate = get_speech_sample() + +n_fft = 2048 +win_length = None +hop_length = 512 +n_mels = 256 +n_mfcc = 256 -params = { - "channel": 0, - "dither": 0.0, - "window_type": "hanning", - "frame_length": frame_length, - "frame_shift": frame_shift, - "remove_dc_offset": False, - "round_to_power_of_two": False, - "sample_frequency": sample_rate, -} +mfcc_transform = T.MFCC( + sample_rate=sample_rate, + n_mfcc=n_mfcc, melkwargs={'n_fft': n_fft, 'n_mels': n_mels, 'hop_length': hop_length}) -specgram = torchaudio.compliance.kaldi.spectrogram(waveform, **params) +mfcc = mfcc_transform(waveform) -print("Shape of spectrogram: {}".format(specgram.size())) +plot_spectrogram(mfcc[0]) -plt.figure() -plt.imshow(specgram.t().numpy(), cmap='gray') ###################################################################### -# We also support computing the filterbank features from waveforms, -# matching Kaldi’s implementation. +# Comparing against librosa +# ~~~~~~~~~~~~~~~~~~~~~~~~~ # -fbank = torchaudio.compliance.kaldi.fbank(waveform, **params) +melspec = librosa.feature.melspectrogram( + y=waveform.numpy()[0], sr=sample_rate, n_fft=n_fft, + win_length=win_length, hop_length=hop_length, + n_mels=n_mels, htk=True, norm=None) -print("Shape of fbank: {}".format(fbank.size())) +mfcc_librosa = librosa.feature.mfcc( + S=librosa.core.spectrum.power_to_db(melspec), + n_mfcc=n_mfcc, dct_type=2, norm='ortho') -plt.figure() -plt.imshow(fbank.t().numpy(), cmap='gray') +plot_spectrogram(mfcc_librosa) + +mse = torch.square(mfcc - mfcc_librosa).mean().item() +print('Mean Square Difference: ', mse) ###################################################################### -# You can create mel frequency cepstral coefficients from a raw audio signal -# This matches the input/output of Kaldi’s compute-mfcc-feats. +# Pitch +# ----- +# + +waveform, sample_rate = get_speech_sample() + +pitch = F.detect_pitch_frequency(waveform, sample_rate) +plot_pitch(waveform, sample_rate, pitch) +play_audio(waveform, sample_rate) + + +###################################################################### +# Kaldi Pitch (beta) +# ------------------ +# +# Kaldi Pitch feature [1] is pitch detection mechanism tuned for ASR +# application. This is a beta feature in torchaudio, and only +# ``functional`` form is available. +# +# 1. A pitch extraction algorithm tuned for automatic speech recognition +# +# Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S. +# Khudanpur +# +# 2014 IEEE International Conference on Acoustics, Speech and Signal +# Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi: +# 10.1109/ICASSP.2014.6854049. +# [`abstract `__], +# [`paper `__] # -mfcc = torchaudio.compliance.kaldi.mfcc(waveform, **params) +waveform, sample_rate = get_speech_sample(resample=16000) -print("Shape of mfcc: {}".format(mfcc.size())) +pitch_feature = F.compute_kaldi_pitch(waveform, sample_rate) +pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1] -plt.figure() -plt.imshow(mfcc.t().numpy(), cmap='gray') +plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc) +play_audio(waveform, sample_rate) ###################################################################### -# Available Datasets -# ----------------- +# Feature Augmentation +# ==================== # -# If you do not want to create your own dataset to train your model, ``torchaudio`` offers a -# unified dataset interface. This interface supports lazy-loading of files to memory, download -# and extract functions, and datasets to build models. + + +###################################################################### +# SpecAugment +# ----------- +# +# `SpecAugment `__ +# is a popular augmentation technique applied on spectrogram. # -# The datasets ``torchaudio`` currently supports are: -# -# - **VCTK**: Speech data uttered by 109 native speakers of English with various accents -# (`Read more here `_). -# - **Yesno**: Sixty recordings of one individual saying yes or no in Hebrew; each -# recording is eight words long (`Read more here `_). -# - **Common Voice**: An open source, multi-language dataset of voices that anyone can use -# to train speech-enabled applications (`Read more here `_). -# - **LibriSpeech**: Large-scale (1000 hours) corpus of read English speech (`Read more here `_). +# ``torchaudio`` implements ``TimeStrech``, ``TimeMasking`` and +# ``FrequencyMasking``. # -yesno_data = torchaudio.datasets.YESNO('./', download=True) -# A data point in Yesno is a tuple (waveform, sample_rate, labels) where labels is a list of integers with 1 for yes and 0 for no. +###################################################################### +# TimeStrech +# ~~~~~~~~~~ +# + +spec = get_spectrogram(power=None) +strech = T.TimeStretch() + +rate = 1.2 +spec_ = strech(spec, rate) +plot_spectrogram(F.complex_norm(spec_[0]), title=f"Stretched x{rate}", aspect='equal', xmax=304) + +plot_spectrogram(F.complex_norm(spec[0]), title="Original", aspect='equal', xmax=304) + +rate = 0.9 +spec_ = strech(spec, rate) +plot_spectrogram(F.complex_norm(spec_[0]), title=f"Stretched x{rate}", aspect='equal', xmax=304) + + +###################################################################### +# TimeMasking +# ~~~~~~~~~~~ +# + +torch.random.manual_seed(4) -# Pick data point number 3 to see an example of the the yesno_data: -n = 3 -waveform, sample_rate, labels = yesno_data[n] +spec = get_spectrogram() +plot_spectrogram(spec[0], title="Original") -print("Waveform: {}\nSample rate: {}\nLabels: {}".format(waveform, sample_rate, labels)) +masking = T.TimeMasking(time_mask_param=80) +spec = masking(spec) -plt.figure() -plt.plot(waveform.t().numpy()) +plot_spectrogram(spec[0], title="Masked along time axis") ###################################################################### -# Now, whenever you ask for a sound file from the dataset, it is loaded in memory only when you ask for it. -# Meaning, the dataset only loads and keeps in memory the items that you want and use, saving on memory. -# +# FrequencyMasking +# ~~~~~~~~~~~~~~~~ +# + +torch.random.manual_seed(4) + +spec = get_spectrogram() +plot_spectrogram(spec[0], title="Original") + +masking = T.FrequencyMasking(freq_mask_param=80) +spec = masking(spec) + +plot_spectrogram(spec[0], title="Masked along frequency axis") + ###################################################################### -# Conclusion -# ---------- +# Datasets +# ======== +# +# ``torchaudio`` provides easy access to common, publicly accessible +# datasets. Please checkout the official documentation for the list of +# available datasets. # -# We used an example raw audio signal, or waveform, to illustrate how to -# open an audio file using ``torchaudio``, and how to pre-process, -# transform, and apply functions to such waveform. We also demonstrated how -# to use familiar Kaldi functions, as well as utilize built-in datasets to -# construct our models. Given that ``torchaudio`` is built on PyTorch, -# these techniques can be used as building blocks for more advanced audio -# applications, such as speech recognition, while leveraging GPUs. +# Here, we take ``YESNO`` dataset and look into how to use it. # + +YESNO_DOWNLOAD_PROCESS.join() + +dataset = torchaudio.datasets.YESNO(YESNO_DATASET_PATH, download=True) + +for i in [1, 3, 5]: + waveform, sample_rate, label = dataset[i] + plot_specgram(waveform, sample_rate, title=f"Sample {i}: {label}") + play_audio(waveform, sample_rate) diff --git a/requirements.txt b/requirements.txt index 83b64c00064..d55369996a0 100644 --- a/requirements.txt +++ b/requirements.txt @@ -23,8 +23,11 @@ ray[tune] ipython # to run examples +boto3 pandas +requests scikit-image +scipy pillow==8.1.0 wget gym From b5d9d7c14ef7d939017447f967740b7280e898bc Mon Sep 17 00:00:00 2001 From: Guanheng George Zhang <6156351+zhangguanheng66@users.noreply.github.com> Date: Thu, 4 Mar 2021 00:07:18 -0500 Subject: [PATCH 03/13] [1.8 release] Switch to the new datasets in torchtext 0.9.0 release - text classification tutorial (#1352) * switch to the new dataset API * checkpoint * checkpoint * checkpoint * update docs * checkpoint * switch to legacy vocab * update to follow the master API * checkpoint * checkpoint * address reviewer's comments Co-authored-by: Guanheng Zhang Co-authored-by: Brian Johnson --- .../text_sentiment_ngrams_tutorial.py | 503 ++++++++++-------- 1 file changed, 276 insertions(+), 227 deletions(-) diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py index d842a058325..67108e69877 100644 --- a/beginner_source/text_sentiment_ngrams_tutorial.py +++ b/beginner_source/text_sentiment_ngrams_tutorial.py @@ -1,66 +1,133 @@ """ -Text Classification with TorchText +Text classification with the torchtext library ================================== -This tutorial shows how to use the text classification datasets -in ``torchtext``, including +In this tutorial, we will show how to use the torchtext library to build the dataset for the text classification analysis. Users will have the flexibility to -:: + - Access to the raw data as an iterator + - Build data processing pipeline to convert the raw text strings into ``torch.Tensor`` that can be used to train the model + - Shuffle and iterate the data with `torch.utils.data.DataLoader `__ +""" - - AG_NEWS, - - SogouNews, - - DBpedia, - - YelpReviewPolarity, - - YelpReviewFull, - - YahooAnswers, - - AmazonReviewPolarity, - - AmazonReviewFull -This example shows how to train a supervised learning algorithm for -classification using one of these ``TextClassification`` datasets. +###################################################################### +# Access to the raw dataset iterators +# ----------------------------------- +# +# The torchtext library provides a few raw dataset iterators, which yield the raw text strings. For example, the ``AG_NEWS`` dataset iterators yield the raw data as a tuple of label and text. -Load data with ngrams ---------------------- +import torch +from torchtext.datasets import AG_NEWS +train_iter = AG_NEWS(split='train') -A bag of ngrams feature is applied to capture some partial information -about the local word order. In practice, bi-gram or tri-gram are applied -to provide more benefits as word groups than only one word. An example: -:: +###################################################################### +# :: +# +# next(train_iter) +# >>> (3, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - +# Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green +# again.") +# +# next(train_iter) +# >>> (3, 'Carlyle Looks Toward Commercial Aerospace (Reuters) Reuters - Private +# investment firm Carlyle Group,\\which has a reputation for making well-timed +# and occasionally\\controversial plays in the defense industry, has quietly +# placed\\its bets on another part of the market.') +# +# next(train_iter) +# >>> (3, "Oil and Economy Cloud Stocks' Outlook (Reuters) Reuters - Soaring +# crude prices plus worries\\about the economy and the outlook for earnings are +# expected to\\hang over the stock market next week during the depth of +# the\\summer doldrums.") +# - "load data with ngrams" - Bi-grams results: "load data", "data with", "with ngrams" - Tri-grams results: "load data with", "data with ngrams" -``TextClassification`` Dataset supports the ngrams method. By setting -ngrams to 2, the example text in the dataset will be a list of single -words plus bi-grams string. +###################################################################### +# Prepare data processing pipelines +# --------------------------------- +# +# We have revisited the very basic components of the torchtext library, including vocab, word vectors, tokenizer. Those are the basic data processing building blocks for raw text string. +# +# Here is an example for typical NLP data processing with tokenizer and vocabulary. The first step is to build a vocabulary with the raw training dataset. Users can have a customized vocab by setting up arguments in the constructor of the Vocab class. For example, the minimum frequency ``min_freq`` for the tokens to be included. -""" -import torch -import torchtext -from torchtext.datasets import text_classification -NGRAMS = 2 -import os -if not os.path.isdir('./.data'): - os.mkdir('./.data') -train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS']( - root='./.data', ngrams=NGRAMS, vocab=None) -BATCH_SIZE = 16 +from torchtext.data.utils import get_tokenizer +from collections import Counter +from torchtext.vocab import Vocab + +tokenizer = get_tokenizer('basic_english') +train_iter = AG_NEWS(split='train') +counter = Counter() +for (label, line) in train_iter: + counter.update(tokenizer(line)) +vocab = Vocab(counter, min_freq=1) + + +###################################################################### +# The vocabulary block converts a list of tokens into integers. +# +# :: +# +# [vocab[token] for token in ['here', 'is', 'an', 'example']] +# >>> [476, 22, 31, 5298] +# +# Prepare the text processing pipeline with the tokenizer and vocabulary. The text and label pipelines will be used to process the raw data strings from the dataset iterators. + +text_pipeline = lambda x: [vocab[token] for token in tokenizer(x)] +label_pipeline = lambda x: int(x) - 1 + + +###################################################################### +# The text pipeline converts a text string into a list of integers based on the lookup table defined in the vocabulary. The label pipeline converts the label into integers. For example, +# +# :: +# +# text_pipeline('here is the an example') +# >>> [475, 21, 2, 30, 5286] +# label_pipeline('10') +# >>> 9 +# + + + +###################################################################### +# Generate data batch and iterator +# -------------------------------- +# +# `torch.utils.data.DataLoader `__ +# is recommended for PyTorch users (a tutorial is `here `__). +# It works with a map-style dataset that implements the ``getitem()`` and ``len()`` protocols, and represents a map from indices/keys to data samples. It also works with an iterable datasets with the shuffle argumnent of ``False``. +# +# Before sending to the model, ``collate_fn`` function works on a batch of samples generated from ``DataLoader``. The input to ``collate_fn`` is a batch of data with the batch size in ``DataLoader``, and ``collate_fn`` processes them according to the data processing pipelines declared previouly. Pay attention here and make sure that ``collate_fn`` is declared as a top level def. This ensures that the function is available in each worker. +# +# In this example, the text entries in the original data batch input are packed into a list and concatenated as a single tensor for the input of ``nn.EmbeddingBag``. The offset is a tensor of delimiters to represent the beginning index of the individual sequence in the text tensor. Label is a tensor saving the labels of indidividual text entries. + + +from torch.utils.data import DataLoader device = torch.device("cuda" if torch.cuda.is_available() else "cpu") +def collate_batch(batch): + label_list, text_list, offsets = [], [], [0] + for (_label, _text) in batch: + label_list.append(label_pipeline(_label)) + processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64) + text_list.append(processed_text) + offsets.append(processed_text.size(0)) + label_list = torch.tensor(label_list, dtype=torch.int64) + offsets = torch.tensor(offsets[:-1]).cumsum(dim=0) + text_list = torch.cat(text_list) + return label_list.to(device), text_list.to(device), offsets.to(device) + +train_iter = AG_NEWS(split='train') +dataloader = DataLoader(train_iter, batch_size=8, shuffle=False, collate_fn=collate_batch) + ###################################################################### # Define the model # ---------------- # -# The model is composed of the -# `EmbeddingBag `__ -# layer and the linear layer (see the figure below). ``nn.EmbeddingBag`` -# computes the mean value of a “bag” of embeddings. The text entries here -# have different lengths. ``nn.EmbeddingBag`` requires no padding here -# since the text lengths are saved in offsets. +# The model is composed of the `nn.EmbeddingBag `__ layer plus a linear layer for the classification purpose. ``nn.EmbeddingBag`` with the default mode of "mean" computes the mean value of a “bag” of embeddings. Although the text entries here have different lengths, nn.EmbeddingBag module requires no padding here since the text lengths are saved in offsets. # # Additionally, since ``nn.EmbeddingBag`` accumulates the average across # the embeddings on the fly, ``nn.EmbeddingBag`` can enhance the @@ -69,11 +136,12 @@ # .. image:: ../_static/img/text_sentiment_ngrams_model.png # -import torch.nn as nn -import torch.nn.functional as F -class TextSentiment(nn.Module): +from torch import nn + +class TextClassificationModel(nn.Module): + def __init__(self, vocab_size, embed_dim, num_class): - super().__init__() + super(TextClassificationModel, self).__init__() self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True) self.fc = nn.Linear(embed_dim, num_class) self.init_weights() @@ -93,8 +161,7 @@ def forward(self, text, offsets): # Initiate an instance # -------------------- # -# The AG_NEWS dataset has four labels and therefore the number of classes -# is four. +# The ``AG_NEWS`` dataset has four labels and therefore the number of classes is four. # # :: # @@ -103,51 +170,14 @@ def forward(self, text, offsets): # 3 : Business # 4 : Sci/Tec # -# The vocab size is equal to the length of vocab (including single word -# and ngrams). The number of classes is equal to the number of labels, -# which is four in AG_NEWS case. +# We build a model with the embedding dimension of 64. The vocab size is equal to the length of the vocabulary instance. The number of classes is equal to the number of labels, # -VOCAB_SIZE = len(train_dataset.get_vocab()) -EMBED_DIM = 32 -NUN_CLASS = len(train_dataset.get_labels()) -model = TextSentiment(VOCAB_SIZE, EMBED_DIM, NUN_CLASS).to(device) - - -###################################################################### -# Functions used to generate batch -# -------------------------------- -# - - -###################################################################### -# Since the text entries have different lengths, a custom function -# generate_batch() is used to generate data batches and offsets. The -# function is passed to ``collate_fn`` in ``torch.utils.data.DataLoader``. -# The input to ``collate_fn`` is a list of tensors with the size of -# batch_size, and the ``collate_fn`` function packs them into a -# mini-batch. Pay attention here and make sure that ``collate_fn`` is -# declared as a top level def. This ensures that the function is available -# in each worker. -# -# The text entries in the original data batch input are packed into a list -# and concatenated as a single tensor as the input of ``nn.EmbeddingBag``. -# The offsets is a tensor of delimiters to represent the beginning index -# of the individual sequence in the text tensor. Label is a tensor saving -# the labels of individual text entries. -# - -def generate_batch(batch): - label = torch.tensor([entry[0] for entry in batch]) - text = [entry[1] for entry in batch] - offsets = [0] + [len(entry) for entry in text] - # torch.Tensor.cumsum returns the cumulative sum - # of elements in the dimension dim. - # torch.Tensor([1.0, 2.0, 3.0]).cumsum(dim=0) - - offsets = torch.tensor(offsets[:-1]).cumsum(dim=0) - text = torch.cat(text) - return text, offsets, label +train_iter = AG_NEWS(split='train') +num_class = len(set([label for (label, text) in train_iter])) +vocab_size = len(vocab) +emsize = 64 +model = TextClassificationModel(vocab_size, emsize, num_class).to(device) ###################################################################### @@ -156,144 +186,170 @@ def generate_batch(batch): # -###################################################################### -# `torch.utils.data.DataLoader `__ -# is recommended for PyTorch users, and it makes data loading in parallel -# easily (a tutorial is -# `here `__). -# We use ``DataLoader`` here to load AG_NEWS datasets and send it to the -# model for training/validation. -# - -from torch.utils.data import DataLoader +import time -def train_func(sub_train_): +def train(dataloader): + model.train() + total_acc, total_count = 0, 0 + log_interval = 500 + start_time = time.time() - # Train the model - train_loss = 0 - train_acc = 0 - data = DataLoader(sub_train_, batch_size=BATCH_SIZE, shuffle=True, - collate_fn=generate_batch) - for i, (text, offsets, cls) in enumerate(data): + for idx, (label, text, offsets) in enumerate(dataloader): optimizer.zero_grad() - text, offsets, cls = text.to(device), offsets.to(device), cls.to(device) - output = model(text, offsets) - loss = criterion(output, cls) - train_loss += loss.item() + predited_label = model(text, offsets) + loss = criterion(predited_label, label) loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1) optimizer.step() - train_acc += (output.argmax(1) == cls).sum().item() - - # Adjust the learning rate - scheduler.step() + total_acc += (predited_label.argmax(1) == label).sum().item() + total_count += label.size(0) + if idx % log_interval == 0 and idx > 0: + elapsed = time.time() - start_time + print('| epoch {:3d} | {:5d}/{:5d} batches ' + '| accuracy {:8.3f}'.format(epoch, idx, len(dataloader), + total_acc/total_count)) + total_acc, total_count = 0, 0 + start_time = time.time() + +def evaluate(dataloader): + model.eval() + total_acc, total_count = 0, 0 - return train_loss / len(sub_train_), train_acc / len(sub_train_) - -def test(data_): - loss = 0 - acc = 0 - data = DataLoader(data_, batch_size=BATCH_SIZE, collate_fn=generate_batch) - for text, offsets, cls in data: - text, offsets, cls = text.to(device), offsets.to(device), cls.to(device) - with torch.no_grad(): - output = model(text, offsets) - loss = criterion(output, cls) - loss += loss.item() - acc += (output.argmax(1) == cls).sum().item() - - return loss / len(data_), acc / len(data_) + with torch.no_grad(): + for idx, (label, text, offsets) in enumerate(dataloader): + predited_label = model(text, offsets) + loss = criterion(predited_label, label) + total_acc += (predited_label.argmax(1) == label).sum().item() + total_count += label.size(0) + return total_acc/total_count ###################################################################### # Split the dataset and run the model # ----------------------------------- # -# Since the original AG_NEWS has no valid dataset, we split the training +# Since the original ``AG_NEWS`` has no valid dataset, we split the training # dataset into train/valid sets with a split ratio of 0.95 (train) and # 0.05 (valid). Here we use # `torch.utils.data.dataset.random_split `__ # function in PyTorch core library. # # `CrossEntropyLoss `__ -# criterion combines nn.LogSoftmax() and nn.NLLLoss() in a single class. +# criterion combines ``nn.LogSoftmax()`` and ``nn.NLLLoss()`` in a single class. # It is useful when training a classification problem with C classes. # `SGD `__ -# implements stochastic gradient descent method as optimizer. The initial -# learning rate is set to 4.0. +# implements stochastic gradient descent method as the optimizer. The initial +# learning rate is set to 5.0. # `StepLR `__ # is used here to adjust the learning rate through epochs. # -import time -from torch.utils.data.dataset import random_split -N_EPOCHS = 5 -min_valid_loss = float('inf') - -criterion = torch.nn.CrossEntropyLoss().to(device) -optimizer = torch.optim.SGD(model.parameters(), lr=4.0) -scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9) - -train_len = int(len(train_dataset) * 0.95) -sub_train_, sub_valid_ = \ - random_split(train_dataset, [train_len, len(train_dataset) - train_len]) - -for epoch in range(N_EPOCHS): - - start_time = time.time() - train_loss, train_acc = train_func(sub_train_) - valid_loss, valid_acc = test(sub_valid_) - secs = int(time.time() - start_time) - mins = secs / 60 - secs = secs % 60 - - print('Epoch: %d' %(epoch + 1), " | time in %d minutes, %d seconds" %(mins, secs)) - print(f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)') - print(f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)') +from torch.utils.data.dataset import random_split +# Hyperparameters +EPOCHS = 10 # epoch +LR = 5 # learning rate +BATCH_SIZE = 64 # batch size for training + +criterion = torch.nn.CrossEntropyLoss() +optimizer = torch.optim.SGD(model.parameters(), lr=LR) +scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1) +total_accu = None +train_iter, test_iter = AG_NEWS() +train_dataset = list(train_iter) +test_dataset = list(test_iter) +num_train = int(len(train_dataset) * 0.95) +split_train_, split_valid_ = \ + random_split(train_dataset, [num_train, len(train_dataset) - num_train]) + +train_dataloader = DataLoader(split_train_, batch_size=BATCH_SIZE, + shuffle=True, collate_fn=collate_batch) +valid_dataloader = DataLoader(split_valid_, batch_size=BATCH_SIZE, + shuffle=True, collate_fn=collate_batch) +test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE, + shuffle=True, collate_fn=collate_batch) + +for epoch in range(1, EPOCHS + 1): + epoch_start_time = time.time() + train(train_dataloader) + accu_val = evaluate(valid_dataloader) + if total_accu is not None and total_accu > accu_val: + scheduler.step() + else: + total_accu = accu_val + print('-' * 59) + print('| end of epoch {:3d} | time: {:5.2f}s | ' + 'valid accuracy {:8.3f} '.format(epoch, + time.time() - epoch_start_time, + accu_val)) + print('-' * 59) ###################################################################### -# Running the model on GPU with the following information: -# -# Epoch: 1 \| time in 0 minutes, 11 seconds +# Running the model on GPU with the following printout: # # :: # -# Loss: 0.0263(train) | Acc: 84.5%(train) -# Loss: 0.0001(valid) | Acc: 89.0%(valid) -# -# -# Epoch: 2 \| time in 0 minutes, 10 seconds -# -# :: -# -# Loss: 0.0119(train) | Acc: 93.6%(train) -# Loss: 0.0000(valid) | Acc: 89.6%(valid) -# -# -# Epoch: 3 \| time in 0 minutes, 9 seconds -# -# :: -# -# Loss: 0.0069(train) | Acc: 96.4%(train) -# Loss: 0.0000(valid) | Acc: 90.5%(valid) -# -# -# Epoch: 4 \| time in 0 minutes, 11 seconds -# -# :: -# -# Loss: 0.0038(train) | Acc: 98.2%(train) -# Loss: 0.0000(valid) | Acc: 90.4%(valid) -# -# -# Epoch: 5 \| time in 0 minutes, 11 seconds -# -# :: -# -# Loss: 0.0022(train) | Acc: 99.0%(train) -# Loss: 0.0000(valid) | Acc: 91.0%(valid) -# +# | epoch 1 | 500/ 1782 batches | accuracy 0.684 +# | epoch 1 | 1000/ 1782 batches | accuracy 0.852 +# | epoch 1 | 1500/ 1782 batches | accuracy 0.877 +# ----------------------------------------------------------- +# | end of epoch 1 | time: 8.33s | valid accuracy 0.867 +# ----------------------------------------------------------- +# | epoch 2 | 500/ 1782 batches | accuracy 0.895 +# | epoch 2 | 1000/ 1782 batches | accuracy 0.900 +# | epoch 2 | 1500/ 1782 batches | accuracy 0.903 +# ----------------------------------------------------------- +# | end of epoch 2 | time: 8.18s | valid accuracy 0.890 +# ----------------------------------------------------------- +# | epoch 3 | 500/ 1782 batches | accuracy 0.914 +# | epoch 3 | 1000/ 1782 batches | accuracy 0.914 +# | epoch 3 | 1500/ 1782 batches | accuracy 0.916 +# ----------------------------------------------------------- +# | end of epoch 3 | time: 8.20s | valid accuracy 0.897 +# ----------------------------------------------------------- +# | epoch 4 | 500/ 1782 batches | accuracy 0.926 +# | epoch 4 | 1000/ 1782 batches | accuracy 0.924 +# | epoch 4 | 1500/ 1782 batches | accuracy 0.921 +# ----------------------------------------------------------- +# | end of epoch 4 | time: 8.18s | valid accuracy 0.895 +# ----------------------------------------------------------- +# | epoch 5 | 500/ 1782 batches | accuracy 0.938 +# | epoch 5 | 1000/ 1782 batches | accuracy 0.935 +# | epoch 5 | 1500/ 1782 batches | accuracy 0.937 +# ----------------------------------------------------------- +# | end of epoch 5 | time: 8.16s | valid accuracy 0.902 +# ----------------------------------------------------------- +# | epoch 6 | 500/ 1782 batches | accuracy 0.939 +# | epoch 6 | 1000/ 1782 batches | accuracy 0.939 +# | epoch 6 | 1500/ 1782 batches | accuracy 0.938 +# ----------------------------------------------------------- +# | end of epoch 6 | time: 8.16s | valid accuracy 0.906 +# ----------------------------------------------------------- +# | epoch 7 | 500/ 1782 batches | accuracy 0.941 +# | epoch 7 | 1000/ 1782 batches | accuracy 0.939 +# | epoch 7 | 1500/ 1782 batches | accuracy 0.939 +# ----------------------------------------------------------- +# | end of epoch 7 | time: 8.19s | valid accuracy 0.903 +# ----------------------------------------------------------- +# | epoch 8 | 500/ 1782 batches | accuracy 0.942 +# | epoch 8 | 1000/ 1782 batches | accuracy 0.941 +# | epoch 8 | 1500/ 1782 batches | accuracy 0.942 +# ----------------------------------------------------------- +# | end of epoch 8 | time: 8.16s | valid accuracy 0.904 +# ----------------------------------------------------------- +# | epoch 9 | 500/ 1782 batches | accuracy 0.942 +# | epoch 9 | 1000/ 1782 batches | accuracy 0.941 +# | epoch 9 | 1500/ 1782 batches | accuracy 0.942 +# ----------------------------------------------------------- +# end of epoch 9 | time: 8.16s | valid accuracy 0.904 +# ----------------------------------------------------------- +# | epoch 10 | 500/ 1782 batches | accuracy 0.940 +# | epoch 10 | 1000/ 1782 batches | accuracy 0.942 +# | epoch 10 | 1500/ 1782 batches | accuracy 0.942 +# ----------------------------------------------------------- +# | end of epoch 10 | time: 8.15s | valid accuracy 0.904 +# ----------------------------------------------------------- ###################################################################### @@ -301,17 +357,20 @@ def test(data_): # ------------------------------------ # -print('Checking the results of test dataset...') -test_loss, test_acc = test(test_dataset) -print(f'\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)') ###################################################################### -# Checking the results of test dataset… +# Checking the results of the test dataset… + +print('Checking the results of test dataset.') +accu_test = evaluate(test_dataloader) +print('test accuracy {:8.3f}'.format(accu_test)) + +################################################ # # :: # -# Loss: 0.0237(test) | Acc: 90.5%(test) +# test accuracy 0.906 # @@ -319,25 +378,18 @@ def test(data_): # Test on a random news # --------------------- # -# Use the best model so far and test a golf news. The label information is -# available -# `here `__. +# Use the best model so far and test a golf news. # -import re -from torchtext.data.utils import ngrams_iterator -from torchtext.data.utils import get_tokenizer -ag_news_label = {1 : "World", - 2 : "Sports", - 3 : "Business", - 4 : "Sci/Tec"} +ag_news_label = {1: "World", + 2: "Sports", + 3: "Business", + 4: "Sci/Tec"} -def predict(text, model, vocab, ngrams): - tokenizer = get_tokenizer("basic_english") +def predict(text, text_pipeline): with torch.no_grad(): - text = torch.tensor([vocab[token] - for token in ngrams_iterator(tokenizer(text), ngrams)]) + text = torch.tensor(text_pipeline(text)) output = model(text, torch.tensor([0])) return output.argmax(1).item() + 1 @@ -353,17 +405,14 @@ def predict(text, model, vocab, ngrams): was even more impressive considering he’d never played the \ front nine at TPC Southwind." -vocab = train_dataset.get_vocab() model = model.to("cpu") -print("This is a %s news" %ag_news_label[predict(ex_text_str, model, vocab, 2)]) - -###################################################################### -# This is a Sports news -# +print("This is a %s news" %ag_news_label[predict(ex_text_str, text_pipeline)]) -###################################################################### -# You can find the code examples displayed in this note -# `here `__. +################################################ +# +# :: +# +# This is a Sports news # From 68ca41de70a3842b601c19b5aed9eb61718bfa75 Mon Sep 17 00:00:00 2001 From: Guanheng George Zhang <6156351+zhangguanheng66@users.noreply.github.com> Date: Thu, 4 Mar 2021 09:32:46 -0500 Subject: [PATCH 04/13] [1.8 release] Switch to LM dataset in torchtext 0.9.0 release (#1349) * switch to raw text dataset in torchtext 0.9.0 release * follow the new API in torchtext master Co-authored-by: Guanheng Zhang Co-authored-by: Brian Johnson --- beginner_source/transformer_tutorial.py | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py index 0c5a484c457..78fbb68ccaa 100644 --- a/beginner_source/transformer_tutorial.py +++ b/beginner_source/transformer_tutorial.py @@ -145,25 +145,27 @@ def forward(self, x): import io import torch -from torchtext.utils import download_from_url, extract_archive +from torchtext.datasets import WikiText2 from torchtext.data.utils import get_tokenizer -from torchtext.vocab import build_vocab_from_iterator +from collections import Counter +from torchtext.vocab import Vocab -url = 'https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip' -test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url)) +train_iter = WikiText2(split='train') tokenizer = get_tokenizer('basic_english') -vocab = build_vocab_from_iterator(map(tokenizer, - iter(io.open(train_filepath, - encoding="utf8")))) +counter = Counter() +for line in train_iter: + counter.update(tokenizer(line)) +vocab = Vocab(counter) def data_process(raw_text_iter): data = [torch.tensor([vocab[token] for token in tokenizer(item)], dtype=torch.long) for item in raw_text_iter] return torch.cat(tuple(filter(lambda t: t.numel() > 0, data))) -train_data = data_process(iter(io.open(train_filepath, encoding="utf8"))) -val_data = data_process(iter(io.open(valid_filepath, encoding="utf8"))) -test_data = data_process(iter(io.open(test_filepath, encoding="utf8"))) +train_iter, val_iter, test_iter = WikiText2() +train_data = data_process(train_iter) +val_data = data_process(val_iter) +test_data = data_process(test_iter) device = torch.device("cuda" if torch.cuda.is_available() else "cpu") From 63bfc84a84a8a9158c1d6151568e0d7dc7e2e5da Mon Sep 17 00:00:00 2001 From: James Reed Date: Thu, 4 Mar 2021 06:41:29 -0800 Subject: [PATCH 05/13] [WIP][FX] CPU Performance Profiling with FX (#1319) Co-authored-by: Brian Johnson --- index.rst | 17 ++ intermediate_source/fx_profiling_tutorial.py | 236 +++++++++++++++++++ 2 files changed, 253 insertions(+) create mode 100644 intermediate_source/fx_profiling_tutorial.py diff --git a/index.rst b/index.rst index ad9bea0ca84..813eb6c5b4a 100644 --- a/index.rst +++ b/index.rst @@ -215,6 +215,15 @@ Welcome to PyTorch Tutorials :link: advanced/super_resolution_with_onnxruntime.html :tags: Production +.. Code Transformations with FX + +.. customcarditem:: + :header: Building a Simple Performance Profiler with FX + :card_description: Build a simple FX interpreter to record the runtime of op, module, and function calls and report statistics + :image: _static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png + :link: intermediate/fx_profiling_tutorial.html + :tags: FX + .. Frontend APIs .. customcarditem:: @@ -537,6 +546,14 @@ Additional Resources advanced/cpp_export advanced/super_resolution_with_onnxruntime +.. toctree:: + :maxdepth: 2 + :includehidden: + :hidden: + :caption: Code Transforms with FX + + intermediate/fx_profiling_tutorial + .. toctree:: :maxdepth: 2 :includehidden: diff --git a/intermediate_source/fx_profiling_tutorial.py b/intermediate_source/fx_profiling_tutorial.py new file mode 100644 index 00000000000..d54f3ccb61e --- /dev/null +++ b/intermediate_source/fx_profiling_tutorial.py @@ -0,0 +1,236 @@ +# -*- coding: utf-8 -*- +""" +(beta) Building a Simple CPU Performance Profiler with FX +******************************************************* +**Author**: `James Reed `_ + +In this tutorial, we are going to use FX to do the following: + +1) Capture PyTorch Python code in a way that we can inspect and gather + statistics about the structure and execution of the code +2) Build out a small class that will serve as a simple performance "profiler", + collecting runtime statistics about each part of the model from actual + runs. + +""" + +###################################################################### +# For this tutorial, we are going to use the torchvision ResNet18 model +# for demonstration purposes. + +import torch +import torch.fx +import torchvision.models as models + +rn18 = models.resnet18() +rn18.eval() + +###################################################################### +# Now that we have our model, we want to inspect deeper into its +# performance. That is, for the following invocation, which parts +# of the model are taking the longest? +input = torch.randn(5, 3, 224, 224) +output = rn18(input) + +###################################################################### +# A common way of answering that question is to go through the program +# source, add code that collects timestamps at various points in the +# program, and compare the difference between those timestamps to see +# how long the regions between the timestamps take. +# +# That technique is certainly applicable to PyTorch code, however it +# would be nicer if we didn't have to copy over model code and edit it, +# especially code we haven't written (like this torchvision model). +# Instead, we are going to use FX to automate this "instrumentation" +# process without needing to modify any source. + +###################################################################### +# First, let's get some imports out of the way (we will be using all +# of these later in the code). + +import statistics, tabulate, time +from typing import Any, Dict, List +from torch.fx import Interpreter + +###################################################################### +# .. note:: +# ``tabulate`` is an external library that is not a dependency of PyTorch. +# We will be using it to more easily visualize performance data. Please +# make sure you've installed it from your favorite Python package source. + +###################################################################### +# Capturing the Model with Symbolic Tracing +# ----------------------------------------- +# Next, we are going to use FX's symbolic tracing mechanism to capture +# the definition of our model in a data structure we can manipulate +# and examine. + +traced_rn18 = torch.fx.symbolic_trace(rn18) +print(traced_rn18.graph) + +###################################################################### +# This gives us a Graph representation of the ResNet18 model. A Graph +# consists of a series of Nodes connected to each other. Each Node +# represents a call-site in the Python code (whether to a function, +# a module, or a method) and the edges (represented as ``args`` and ``kwargs`` +# on each node) represent the values passed between these call-sites. More +# information about the Graph representation and the rest of FX's APIs ca +# be found at the FX documentation https://pytorch.org/docs/master/fx.html. + + +###################################################################### +# Creating a Profiling Interpreter +# -------------------------------- +# Next, we are going to create a class that inherits from ``torch.fx.Interpreter``. +# Though the ``GraphModule`` that ``symbolic_trace`` produces compiles Python code +# that is run when you call a ``GraphModule``, an alternative way to run a +# ``GraphModule`` is by executing each ``Node`` in the ``Graph`` one by one. That is +# the functionality that ``Interpreter`` provides: It interprets the graph node- +# by-node. +# +# By inheriting from ``Interpreter``, we can override various functionality and +# install the profiling behavior we want. The goal is to have an object to which +# we can pass a model, invoke the model 1 or more times, then get statistics about +# how long the model and each part of the model took during those runs. +# +# Let's define our ``ProfilingInterpreter`` class: + +class ProfilingInterpreter(Interpreter): + def __init__(self, mod : torch.nn.Module): + # Rather than have the user symbolically trace their model, + # we're going to do it in the constructor. As a result, the + # user can pass in any ``Module`` without having to worry about + # symbolic tracing APIs + gm = torch.fx.symbolic_trace(mod) + super().__init__(gm) + + # We are going to store away two things here: + # + # 1. A list of total runtimes for ``mod``. In other words, we are + # storing away the time ``mod(...)`` took each time this + # interpreter is called. + self.total_runtime_sec : List[float] = [] + # 2. A map from ``Node`` to a list of times (in seconds) that + # node took to run. This can be seen as similar to (1) but + # for specific sub-parts of the model. + self.runtimes_sec : Dict[torch.fx.Node, List[float]] = {} + + ###################################################################### + # Next, let's override our first method: ``run()``. ``Interpreter``'s ``run`` + # method is the top-level entrypoint for execution of the model. We will + # want to intercept this so that we can record the total runtime of the + # model. + + def run(self, *args) -> Any: + # Record the time we started running the model + t_start = time.time() + # Run the model by delegating back into Interpreter.run() + return_val = super().run(*args) + # Record the time we finished running the model + t_end = time.time() + # Store the total elapsed time this model execution took in the + # ProfilingInterpreter + self.total_runtime_sec.append(t_end - t_start) + return return_val + + ###################################################################### + # Now, let's override ``run_node``. ``Interpreter`` calls ``run_node`` each + # time it executes a single node. We will intercept this so that we + # can measure and record the time taken for each individual call in + # the model. + + def run_node(self, n : torch.fx.Node) -> Any: + # Record the time we started running the op + t_start = time.time() + # Run the op by delegating back into Interpreter.run_node() + return_val = super().run_node(n) + # Record the time we finished running the op + t_end = time.time() + # If we don't have an entry for this node in our runtimes_sec + # data structure, add one with an empty list value. + self.runtimes_sec.setdefault(n, []) + # Record the total elapsed time for this single invocation + # in the runtimes_sec data structure + self.runtimes_sec[n].append(t_end - t_start) + return return_val + + ###################################################################### + # Finally, we are going to define a method (one which doesn't override + # any ``Interpreter`` method) that provides us a nice, organized view of + # the data we have collected. + + def summary(self, should_sort : bool = False) -> str: + # Build up a list of summary information for each node + node_summaries : List[List[Any]] = [] + # Calculate the mean runtime for the whole network. Because the + # network may have been called multiple times during profiling, + # we need to summarize the runtimes. We choose to use the + # arithmetic mean for this. + mean_total_runtime = statistics.mean(self.total_runtime_sec) + + # For each node, record summary statistics + for node, runtimes in self.runtimes_sec.items(): + # Similarly, compute the mean runtime for ``node`` + mean_runtime = statistics.mean(runtimes) + # For easier understanding, we also compute the percentage + # time each node took with respect to the whole network. + pct_total = mean_runtime / mean_total_runtime * 100 + # Record the node's type, name of the node, mean runtime, and + # percent runtim + node_summaries.append( + [node.op, str(node), mean_runtime, pct_total]) + + # One of the most important questions to answer when doing performance + # profiling is "Which op(s) took the longest?". We can make this easy + # to see by providing sorting functionality in our summary view + if should_sort: + node_summaries.sort(key=lambda s: s[2], reverse=True) + + # Use the ``tabulate`` library to create a well-formatted table + # presenting our summary information + headers : List[str] = [ + 'Op type', 'Op', 'Average runtime (s)', 'Pct total runtime' + ] + return tabulate.tabulate(node_summaries, headers=headers) + +###################################################################### +# .. note:: +# We use Python's ``time.time`` function to pull wall clock +# timestamps and compare them. This is not the most accurate +# way to measure performance, and will only give us a first- +# order approximation. We use this simple technique only for the +# purpose of demonstration in this tutorial. + +###################################################################### +# Investigating the Performance of ResNet18 +# ----------------------------------------- +# We can now use ``ProfilingInterpreter`` to inspect the performance +# characteristics of our ResNet18 model; + +interp = ProfilingInterpreter(rn18) +interp.run(input) +print(interp.summary(True)) + +###################################################################### +# There are two things we should call out here: +# +# * MaxPool2d takes up the most time. This is a known issue: +# https://github.com/pytorch/pytorch/issues/51393 +# * BatchNorm2d also takes up significant time. We can continue this +# line of thinking and optimize this in the Conv-BN Fusion with FX +# tutorial TODO: link +# +# +# Conclusion +# ---------- +# As we can see, using FX we can easily capture PyTorch programs (even +# ones we don't have the source code for!) in a machine-interpretable +# format and use that for analysis, such as the performance analysis +# we've done here. FX opens up an exiciting world of possibilities for +# working with PyTorch programs. +# +# Finally, since FX is still in beta, we would be happy to hear any +# feedback you have about using it. Please feel free to use the +# PyTorch Forums (https://discuss.pytorch.org/) and the issue tracker +# (https://github.com/pytorch/pytorch/issues) to provide any feedback +# you might have. From 5bda6b07d2fd2bc20062157c98f95ed0da8c418b Mon Sep 17 00:00:00 2001 From: Horace He Date: Thu, 4 Mar 2021 06:45:37 -0800 Subject: [PATCH 06/13] [FX] Added fuser tutorial (#1356) * Added fuser tutorial * updated index.rst * fixed conclusion * responded to some comments * responded to comments * respond Co-authored-by: Brian Johnson --- index.rst | 8 +- intermediate_source/fx_conv_bn_fuser.py | 262 ++++++++++++++++++++++++ 2 files changed, 269 insertions(+), 1 deletion(-) create mode 100644 intermediate_source/fx_conv_bn_fuser.py diff --git a/index.rst b/index.rst index 813eb6c5b4a..c2637e5ce9a 100644 --- a/index.rst +++ b/index.rst @@ -217,6 +217,13 @@ Welcome to PyTorch Tutorials .. Code Transformations with FX +.. customcarditem:: + :header: Building a Convolution/Batch Norm fuser in FX + :card_description: Build a simple FX pass that fuses batch norm into convolution to improve performance during inference. + :image: _static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png + :link: intermediate/fx_conv_bn_fuser.html + :tags: FX + .. customcarditem:: :header: Building a Simple Performance Profiler with FX :card_description: Build a simple FX interpreter to record the runtime of op, module, and function calls and report statistics @@ -614,4 +621,3 @@ Additional Resources beginner/deeplabv3_on_ios beginner/deeplabv3_on_android - diff --git a/intermediate_source/fx_conv_bn_fuser.py b/intermediate_source/fx_conv_bn_fuser.py new file mode 100644 index 00000000000..93b89c08fec --- /dev/null +++ b/intermediate_source/fx_conv_bn_fuser.py @@ -0,0 +1,262 @@ +# -*- coding: utf-8 -*- +""" +(beta) Building a Convolution/Batch Norm fuser in FX +******************************************************* +**Author**: `Horace He `_ + +In this tutorial, we are going to use FX, a toolkit for composable function +transformations of PyTorch, to do the following: + +1) Find patterns of conv/batch norm in the data dependencies. +2) For the patterns found in 1), fold the batch norm statistics into the convolution weights. + +Note that this optimization only works for models in inference mode (i.e. `mode.eval()`) + +We will be building the fuser that exists here: +https://github.com/pytorch/pytorch/blob/orig/release/1.8/torch/fx/experimental/fuser.py + +""" + + +###################################################################### +# First, let's get some imports out of the way (we will be using all +# of these later in the code). + +from typing import Type, Dict, Any, Tuple, Iterable +import copy +import torch.fx as fx +import torch +import torch.nn as nn + +###################################################################### +# For this tutorial, we are going to create a model consisting of convolutions +# and batch norms. Note that this model has some tricky components - some of +# the conv/batch norm patterns are hidden within Sequentials and one of the +# BatchNorms is wrapped in another Module. + +class WrappedBatchNorm(nn.Module): + def __init__(self): + super().__init__() + self.mod = nn.BatchNorm2d(1) + def forward(self, x): + return self.mod(x) + +class M(nn.Module): + def __init__(self): + super().__init__() + self.conv1 = nn.Conv2d(1, 1, 1) + self.bn1 = nn.BatchNorm2d(1) + self.conv2 = nn.Conv2d(1, 1, 1) + self.nested = nn.Sequential( + nn.BatchNorm2d(1), + nn.Conv2d(1, 1, 1), + ) + self.wrapped = WrappedBatchnorm() + + def forward(self, x): + x = self.conv1(x) + x = self.bn1(x) + x = self.conv2(x) + x = self.nested(x) + x = self.wrapped(x) + return x + +model = M() + +model.eval() + +###################################################################### +# Fusing Convolution with Batch Norm +# ----------------------------------------- +# One of the primary challenges with trying to automatically fuse convolution +# and batch norm in PyTorch is that PyTorch does not provide an easy way of +# accessing the computational graph. FX resolves this problem by symbolically +# tracing the actual operations called, so that we can track the computations +# through the `forward` call, nested within Sequential modules, or wrapped in +# an user-defined module. + +traced_model = torch.fx.symbolic_trace(model) +print(traced_model.graph) + +###################################################################### +# This gives us a graph representation of our model. Note that both the modules +# hidden within the sequential as well as the wrapped Module have been inlined +# into the graph. This is the default level of abstraction, but it can be +# configured by the pass writer. More information can be found at the FX +# overview https://pytorch.org/docs/master/fx.html#module-torch.fx + + +#################################### +# Fusing Convolution with Batch Norm +# ---------------------------------- +# Unlike some other fusions, fusion of convolution with batch norm does not +# require any new operators. Instead, as batch norm during inference +# consists of a pointwise add and multiply, these operations can be "baked" +# into the preceding convolution's weights. This allows us to remove the batch +# norm entirely from our model! Read +# https://nenadmarkus.com/p/fusing-batchnorm-and-conv/ for further details. The +# code here is copied from +# https://github.com/pytorch/pytorch/blob/orig/release/1.8/torch/nn/utils/fusion.py +# clarity purposes. +def fuse_conv_bn_eval(conv, bn): + """ + Given a conv Module `A` and an batch_norm module `B`, returns a conv + module `C` such that C(x) == B(A(x)) in inference mode. + """ + assert(not (conv.training or bn.training)), "Fusion only for eval!" + fused_conv = copy.deepcopy(conv) + + fused_conv.weight, fused_conv.bias = \ + fuse_conv_bn_weights(fused_conv.weight, fused_conv.bias, + bn.running_mean, bn.running_var, bn.eps, bn.weight, bn.bias) + + return fused_conv + +def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b): + if conv_b is None: + conv_b = torch.zeros_like(bn_rm) + if bn_w is None: + bn_w = torch.ones_like(bn_rm) + if bn_b is None: + bn_b = torch.zeros_like(bn_rm) + bn_var_rsqrt = torch.rsqrt(bn_rv + bn_eps) + + conv_w = conv_w * (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1)) + conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b + + return torch.nn.Parameter(conv_w), torch.nn.Parameter(conv_b) + + +#################################### +# FX Fusion Pass +# ---------------------------------- +# Now that we have our computational graph as well as a method for fusing +# convolution and batch norm, all that remains is to iterate over the FX graph +# and apply the desired fusions. + + +def _parent_name(target : str) -> Tuple[str, str]: + """ + Splits a qualname into parent path and last atom. + For example, `foo.bar.baz` -> (`foo.bar`, `baz`) + """ + *parent, name = target.rsplit('.', 1) + return parent[0] if parent else '', name + +def replace_node_module(node: fx.Node, modules: Dict[str, Any], new_module: torch.nn.Module): + assert(isinstance(node.target, str)) + parent_name, name = _parent_name(node.target) + setattr(modules[parent_name], name, new_module) + + +def fuse(model: torch.nn.Module) -> torch.nn.Module: + model = copy.deepcopy(model) + # The first step of most FX passes is to symbolically trace our model to + # obtain a `GraphModule`. This is a representation of our original model + # that is functionally identical to our original model, except that we now + # also have a graph representation of our forward pass. + fx_model: fx.GraphModule = fx.symbolic_trace(model) + modules = dict(fx_model.named_modules()) + + # The primary representation for working with FX are the `Graph` and the + # `Node`. Each `GraphModule` has a `Graph` associated with it - this + # `Graph` is also what generates `GraphModule.code`. + # The `Graph` itself is represented as a list of `Node` objects. Thus, to + # iterate through all of the operations in our graph, we iterate over each + # `Node` in our `Graph`. + for node in fx_model.graph.nodes: + # The FX IR contains several types of nodes, which generally represent + # call sites to modules, functions, or methods. The type of node is + # determined by `Node.op`. + if node.op != 'call_module': # If our current node isn't calling a Module then we can ignore it. + continue + # For call sites, `Node.target` represents the module/function/method + # that's being called. Here, we check `Node.target` to see if it's a + # batch norm module, and then check `Node.args[0].target` to see if the + # input `Node` is a convolution. + if type(modules[node.target]) is nn.BatchNorm2d and type(modules[node.args[0].target]) is nn.Conv2d: + if len(node.args[0].users) > 1: # Output of conv is used by other nodes + continue + conv = modules[node.args[0].target] + bn = modules[node.target] + fused_conv = fuse_conv_bn_eval(conv, bn) + replace_node_module(node.args[0], modules, fused_conv) + # As we've folded the batch nor into the conv, we need to replace all uses + # of the batch norm with the conv. + node.replace_all_uses_with(node.args[0]) + # Now that all uses of the batch norm have been replaced, we can + # safely remove the batch norm. + fx_model.graph.erase_node(node) + fx_model.graph.lint() + # After we've modified our graph, we need to recompile our graph in order + # to keep the generated code in sync. + fx_model.recompile() + return fx_model + + +###################################################################### +# .. note:: +# We make some simplifications here for demonstration purposes, such as only +# matching 2D convolutions. View +# https://github.com/pytorch/pytorch/blob/master/torch/fx/experimental/fuser.py +# for a more usable pass. + +###################################################################### +# Testing out our Fusion Pass +# ----------------------------------------- +# We can now run this fusion pass on our initial toy model and verify that our +# results are identical. In addition, we can print out the code for our fused +# model and verify that there are no more batch norms. + + +fused_model = fuse(model) +print(fused_model.code) +inp = torch.randn(5, 1, 1, 1) +torch.testing.assert_allclose(fused_model(inp), model(inp)) + + +###################################################################### +# Benchmarking our Fusion on ResNet18 +# ---------- +# We can test our fusion pass on a larger model like ResNet18 and see how much +# this pass improves inference performance. +import torchvision.models as models +import time + +rn18 = models.resnet18() +rn18.eval() + +inp = torch.randn(10, 3, 224, 224) +output = rn18(inp) + +def benchmark(model, iters=20): + for _ in range(10): + model(inp) + begin = time.time() + for _ in range(iters): + model(inp) + return str(time.time()-begin) + +fused_rn18 = fuse(rn18) +print("Unfused time: ", benchmark(rn18)) +print("Fused time: ", benchmark(fused_rn18)) +###################################################################### +# As we previously saw, the output of our FX transformation is +# (Torchscriptable) PyTorch code, we can easily `jit.script` the output to try +# and increase our performance even more. In this way, our FX model +# transformation composes with Torchscript with no issues. +jit_rn18 = torch.jit.script(fused_rn18) +print("jit time: ", benchmark(jit_rn18)) + + +############ +# Conclusion +# ---------- +# As we can see, using FX we can easily write static graph transformations on +# PyTorch code. +# +# Since FX is still in beta, we would be happy to hear any +# feedback you have about using it. Please feel free to use the +# PyTorch Forums (https://discuss.pytorch.org/) and the issue tracker +# (https://github.com/pytorch/pytorch/issues) to provide any feedback +# you might have. \ No newline at end of file From 07b6340348a1b00a568795c0ceec37b1e18bba42 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 4 Mar 2021 10:27:00 -0500 Subject: [PATCH 07/13] Update numeric_suite_tutorial.py --- prototype_source/numeric_suite_tutorial.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index 61b7c670fd8..fee8308eb95 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -50,7 +50,7 @@ float_model.fuse_model() float_model.qconfig = torch.quantization.default_qconfig img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)] -qmodel = quantize(float_model, default_eval_fn, img_data, inplace=False) +qmodel = quantize(float_model, default_eval_fn, (img_data,), inplace=False) ############################################################################## # 1. Compare the weights of float and quantized models From 49362b16ae3ba883cd9edbc0adef4e52701d90e2 Mon Sep 17 00:00:00 2001 From: Pritam Damania <9958665+pritamdamania87@users.noreply.github.com> Date: Thu, 4 Mar 2021 09:03:26 -0800 Subject: [PATCH 08/13] Tutorial combining DDP with Pipeline Parallelism to Train Transformer models (#1347) * Tutorial combining DDP with Pipeline Parallelism to Train Transformer models. Summary: Tutorial which places a pipe on GPUs 0 and 1 and another Pipe on GPUs 2 and 3. Both pipe replicas are replicated via DDP. One process drives GPUs 0 and 1 and another drives GPUs 2 and 3. * Polish out some of the docs. * Add thumbnail and address some comments. Co-authored-by: pritam --- ...Data-Parallel-and-Pipeline-Parallelism.png | Bin 0 -> 35776 bytes advanced_source/ddp_pipeline_tutorial.py | 464 ++++++++++++++++++ index.rst | 8 + 3 files changed, 472 insertions(+) create mode 100644 _static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png create mode 100644 advanced_source/ddp_pipeline_tutorial.py diff --git a/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png b/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png new file mode 100644 index 0000000000000000000000000000000000000000..426a14d98f5f7fbbf695626658ad1946fe4ef63e GIT binary patch literal 35776 zcmeEtbcnTI)&B)>Oj7rou)-Lc&u~R(Owu^vdwR2NV4z(ry2B4GD=JNk!qU zZeYP_7sh9r#ZbtDAAG_UiA(Gz6Q1<_OD+;+>G=~*zEZu#3PU>WIv-4D-){6C*cXst=D#k!;j7i3u!AhP{V)= zWaN#4(zMfeQ^1d2G$Vv^m<%z5v0Ep)*e`E6k|fmZ|3V6V%9dFFH<0|l!2j~`KREpF zH2hZ`{#OkCR~-M>8~#6NjSFI|Ezj?_6M|`8ninipY=Ma{ovmwGs~5zaOvE5pOI}KY z@~s^sP$mSts7EXpABu_1I(=nKr%j>=3!mO7er#x*j;~kza3eNESMo9D=Pm>Oo9hvN z=b|(onfMub<`}{U4l4@lll|(YrXXVTpc~Rbt7Lh6Nz_J%>31YUEPozl@qb>VqArNV zto$Ym#N0-rm3ad3mwk*O^wW)gy~0!%Lug$b8Rc}fKWX^<_)!Yp@5A!jJOrUAbu}ST z6?&{PkuOQ`e4_S@_`qrYEU>?EI-Wss;r#w}m+PqO7CJhASat`x1 z@Tbayc-Vtw$~TUy_n|w%WP_35E>ch?=L}oGg~t))L5s zs22%Q&i&9DY5FyXE+;RA)}ckhR5#4PA5C74NgJ>c-Qw|xWOxUTUlm1m^MDDydC3)B zre-9-()+;H*U!)D_}{O?(^K^?d15*bGEww(VCEVYd~c#u249o~ZFzC*%{b6|7+1Fg5wk)-R6mMA6@wo}(QHWjf#f7j2Y zg466!?-2RW_e13Y+`}FWSi!-i*8cvSzp*CQn%Gzk;mu? z*pw|(guP<;E_KR3ZgMdH?(cy6D*zGb_keaX=4{8!2s(fIcvTPMe47IMDW)DJ*`R9= zGh*%dMhbCalH7JxxL_)G;Y!ku>(JeID+|EY9mRq?>bZt24*zSue_c$M&r-3ZsXGe-)nav&lrUJdijq|_SV~>I$yX#-FuIB_h$i+`6!fjz zM+fKg=?8xN_<>Q|ea~_K@PH9x>%1J%o;&xF7@O{Vw0N3(o`-szsy-j(MJk@G89Mu zpu@kS9UVSh*c*UA4m@PxGpJizz=~kxQdIJG9V_vs^VvY**vf--6)uv-K1dxI-XscDZr{r=5(;D_I$;qc56T1 zW7sB5+Mbc*SD;r}MY_JV6Q~Cd{&6Sx^5qNO`r0scc#+~+4u|%ao{Rq^5OI-4u-rlz z%kS&P$iny9&?YfX;C>72C+a!hgG8|5NEvej!;bohn)YgZf?*U|^o_hb$bf%&aFnph zn{w{EhG3*(fO(^eUw~6V346_L-UF~&SWEgQ^C=(P<+&xtC3(tM5DIA%yW7q1nu)~7 zrDkW#`UK*E3l~sl0Y7|KA5C>PFb+HA4od&0K0*3+LvI3lcy^{}Vv=8lX{``Xy}qxZ z4ci=)f8|1gnbA^k%ph!UXAEWUC(adY?ou_V0)V95;p-K`M3pp}E8!*^{l* z#MUyPe{PcF+bXS-D%qT6j`cAVoG@yE|***CiCpLDFdqy@_BcqyS0#O8f{tp-F zpdn+VFh|8}PW7;>aT!%Bg3e$FpO;<_^kbbw^iB5f_qaD@8w-)`izx3}75a1)nm*}1 z5c>O;Ir(alyh(OG#Q8bSh$;-;Zt>4NUNHWVLUfsIf$hwqrFt>gb?si}lIKKBFxfP! zH8GG7VQ*cv3@a=tAOBphM52v*+r5)2S@-eAc`a=947B@2&9g7oV`NR-M}RX4Cqh&- zPxOM@Bxsz7`{Te+fpF2>@X6dMcp#MJ`PrUdH)XMT-p>OD9d-kS*3-j3hAR8#5DtCm z2#>?pX@|4WzcwsK3da6;gVZHto*99Cd=BXZ+)jz=Son1@zq`xYeP4G_GNc(78b4Pf zmR0wut~T-GX)9*m<<^MZt(T*IRaoG8CI1Q|fH_+zp0ba$oMe&vfg4w~B{xB0Uh)Ku zHDH%zC4~ETHsnpfrq$~L2wHS21}?ttv{o+<&-fXNO|Ro}8THYyiTmJ=3*AU!oWZ6I z;E5tt5&>fH0{Zu{-dY|_H70$KpYk(c(%d~a27#}($IQsFOBlBT_#*qUoLL+45_}l2 zN3`S^1_$50ATHh-px3^y>cvsxj?(==YB`jme}G*`8vKzUY&<>Q_sT?9{-w%wQ zQLnYDOCiihPX&K=;;l>Zd-Pwi&|-(&m!E4%QAQfPOIdZY9xTJCOXc98E>_xGc8EBW zJ}i_#d&1zBbJ`F2_sDZ?L}wiVqe!T`%5!ym^=1Bh8IDl0Hjaz_`>lQ)-oc4nK|vx^ zQRN_~ep!wQ3UG_f2tG0Pqa@0KxRZ~ZT{UYW=CG-kfP;D}u}O-%YWHx@MQCVODCpvQ zex2VqS$-yE@t(qFx91dm)dOhizx-{fGbkxx_MOhZiNe`qkAb%*OYJ`vt>%C0{rse- ztD5zeN5#DLO`6neC0ak-%TpmQei4W~XF$b(5&Ps9z1q@NR`xkQm;EXRC}|mHnf-5$ zy-95o4-17C7?f!I?*CdT2|??=BDI{?NVsX8i?b*{;;6f=6w|m!8~8osqG!+hDSU{P zFmzL1c2O#`yrL?4aS)~}$J_tlA^SM@j1CDW`ROL-G=}TTB~2E1a2G!%1K6l&Ln?1~ zR7rqUR{|tk^9g)C&RTGLfQTL7aQz6KOHEPdUY}Z0RNl;`w1QO(?Yd;u0@3>bVntHP zwK?pTpmBStW*EEF`6f!X;vbd5Vvouuqu``Y=J78?F9+ly9!&<)0hI~48@Cv4`t`wU zTZQ#Z&`m#8Y#Wot|Io_FFL+UI_-T=i(PT(c_@VXbAc*B-_~Q;Be>|(dwR$bv=(Uq# z@YBtr*YU3(+ezUUioU-7OU+Xz#WQlccv{tzuUjD2 z$wKwO<=(j0!&&RjvO}?Av%YXEJ&vn{mzA}6qN{*?Ch_ufmRU(R$jcwPI;po`I4!Hg|WTj#<{C2t8u`IkxpuU2PeMfg1O@Ft>5w45f(Jk)sn5aX~K1WZ~Tw9v@4efn`D%7?`%Jc_1<8M>q2Q-g8;fRCVzd7A8Nxa=+I4x5c7LuaG z+>Rs`JrC|E5EpZ%ZPWsKDJp-%6SW%aU!5tdY8eLQ0y^AF3 z@vbd6&S;t9(EJjH7dg*X@s4YJ{04c7l&?_2jfw}f`HIGO=B;6!B{Jd+jKDK#b#ETq zuaW(=s??4n>b@3lfw;^OyZ>44b!ZjGvXH|!bBy_o%+G~ zK?Sm^SEbav9KnuB&TQ+=G#3E~y1?527+R5qz_?N2vQ}IbD#&--jNRPFu<>O0JPamwdTmnH9PXNj6Kua--A}_QazHuc4ME_5E;9^_rL+SATG7TNFAf^; z@2)nZ3AQ6K=#;l>HcLrxYe;xMIf#*$OcgHWI5qzv+{xd6lfCptRfh9ByuahgebdC^ z-Q)S0z6W+89bwFQIYp#}+^@CNC7bW`>&h}^K{!NVmxA#d!R*^A89IrKncHkw7C(L_ zW`qTj=(+~&#K|1%ByYUum21a$pov(CJ+Lw{p_-tLi*NOKOXDVDV5_xR`p}s{#2bb) zmR_vPb&^${C)1(Y?fI*6(A|sJ>a`)z?OrJ4c&@nLF2~IN+1?1FLZ1wqKM|!J%L05Y zK^JoP)4&eo&}>htCFn=t6!Pk)W(hy!O+#d(J{y?ZqM zF#0n+(vI)b6UJ3}YO4ISYeLYla)B9D>7)7fQ#?2)+mi*SZk}bl>=cF9)#5N^$ z!bNOY(++y5GHTc~(O|WwXXK_#C)l9l>1OkX_ls53dm}tY>O>E^=3&F@yMm83EaCo( z0(95g=fy6U_a#@eyU<{@$?hMC?*=KOoB-XVG`*#_(Q z@V!(A*DgxP`xTpka==#~p~Lfa@zET{1dN*SFQ19OZ;)u9cO;@NxLezOKbHA8K&ayo z<9|&LI^)^2sCh-#(Ff^y9~}HzQZI!t=T*#}Ob4sg83%~Ej03SY)My@rmv};&4sAlOHQ%w2OD9T7Le1l#teADJN$Dv{K zYr{LFl{9IhKApI7XXX#mx=U;;EccjJgbXkWWU}NFKQ)`4-gKtMG0)9p3@+Rx0@LIV zn&Ly)$t$w?=%6>%NkiWFE{iqIA`&EqqxPMy%oC$?vusSalH)Ql&7SJD(fNlaq+WKs zO4KFg6?p%OY69a7FWwJ-P}jGoy$Jc|BHY2s7Jmh2fTAsSq{ER``EGpSOyLv7R0R4= z0kG5Mul%9tFRa%Y*gZH*!pXrXQzYR&_8R2sy5y!pab$1ix&?!{yN2A^N!?t}88&$` zXo?{MJ8A{hQbjEz`6F{R&(#r@)5^x4)oFVzXNM=y@%uo{e340G(+kiK^}?vB)Tur> z&J?v~G%R#p<-GKSRWYJ2$UVKN%z57R>0vz&e!hrX1Hk;v!i*1@L&eP!g}k`K6pebm z1m0QUSxY!=VIijXv}hWzO8mscoAK1c{J1Fj6M5`_9g&DxiCBQcy=jngR&@gSy+7x9 zr%_JghPfn*Ig0s!*je!yT`^?CIto8Lz>r>@0*@=tdwI%w@pJDA$}w|G6;@S>w=Ck2C&h+C&;Jn3^;>5~TtWfi7BuSVfby)$#QfQ&sVbM)ljDioQs9NsF*I zx{<(x_kL#uZeIuoZpGdh!Y3p`5Ld+`39Ln8Kg}{bhFi4HUt2Z*N5Xgx;Q8aTHLP7U zt`orv?bOAVCDZ*T_(u$JnA+EJWvxfZ@d}F9a}U>jwo%Dru~yC80)tTWYmMC&ckjrH zrUyykzb?%8#P>th8I+0oV`q^uXd=o}rC!Hzn>m+x9{*#j!lw z$&;lGaUEz>>C&*ac$^GV=YxAcXvx@(jC@8d3fSShNJm98uO9&LWgzX{$L?`WHm)U3Ys zQ`;ks9A4Ueh!mI;j*b}0LC*MY_>ezijoLX_K^;4)y-CoTO-VN#P-aMx7A;3_^27J9=2aNsm6?NdwAMXw!#_at_1JjZJ zLk=VZ5qAd~Y~Ek&t6Po3@uWkgS(G15*-&C*bBx0R@x|JnA?@I;DM_eM>n-fM=tuJ) ztt1rBJwLaZL^Q_iwb<+j$HJifTzjJLGrnjcS2%fFeDDzMh9$lV>m0_5StUTTi(EZ? zJBgQNp~<(WAZup6e%v%O>XC@Fw^aYat9JEAIm-)E2Sxz0&uP!3pt~H) zMcUN2GCKqAo#^v-S2M6#E&8HH;QdjvuvK;XIiN)sjUX?fb!b9dJ{+W2#oSSxfXbpT%+7o%s-($sfW zQQqH4fuNN^B#z%c{P~4jFb)sjH?Dh|54mD%_(BTp*bQAifZJM5S&R=)r_IYk?-L37 z)uL7_pY){aVOVQkBeWpI{mG=a4bKl_(n7bw!-9VaxjyzQ@-oV%Llh*npG=(y5zl)$ z7Q^>9CmMB+mj*q9SR=&|I6Q&WQ!)oM`D++ZaFGeN*<-;_I`T_dBCitTsn%{P(4m-Z z7>54rc@NlT#D5 z&jPV$v7IteWNMeh_}bPZZra zkphyqt?f^TBYYY`vzPvt>_0X$Gc@};?|d?b^!|OcsL!CNvfrLqe{LGC4hq40kwkd7 zdag99OxymB^??|Nn>sI6N^|pxyzD8%pHdbtE{Aif-#+;*n#EmB0|DecaO;Awp=&U0^GR6B<|m8ZsKT#f2U?1j zS9XAVi4(^Yt1Q@2e7XB=5*boXD0<-H$fj0acH+;98okfjjq~XF7f?bxv0?!#qWx*B_l$+9^>`8;@-J_G2@$)P zO#CNZiOvZ^kIjW2@9IDC5DA64LYqdDI6d(r3O2taH%F>Bb&W1j%W?c(SAeEQdI{0Q zx4N#_?y;K+e{3|pzWxf5+DR5XeO5Fr=aP3$9$m9keob8VZ%o;7Z);Ft>+VI*SRNcX zk+-rsU=25miri5RqjXJoYUL+^H7y^XP8bz~VJ6)9T9p<^z^qGL~^~c%vuO_|-wMew@Zz1xNGLtN2!wRpgF}JGANe>G}t- z*=zqYEu02$e;zk!RtVS`oJ#L?R&>p(CcLY;O^SQJL;ywX{d_tG&eR`eF$LBOkjAUU zKXD%t$YmZg--;l0MOyYSp98&^&!^HnD;hx6OASdOzJq7WiL+Z!D=z`C%WVNCSeC%< zYbGgMIl;UhyxeylPQ<^@fjY*V7glLso;WVK`Jw{vkZtxLsQbPU>&Wao5~u5U;NAEG z&Vo0)X;i3K%^VT|!cw^2>ATgs1%?Hy1bCcQI5lTSqYb!b^fhRsyynS*Q&K1a;LujL z5hJYIWtg{Q%h{4Wiuex4#78Ul!gh^h_b3h_x1NN`R8D_m3b8wH0R>G|uZFD(@6-Ab z`+B-y8y(8j`q!rRU05d~%b>6AFPtC%{EUE4%E`G9)Jk(p2jw&;9E%C5`E*we)djZU zHdow74uP}URrjeX^pb0Y+Do0-AC^XB?BOWzgp7(5!Q|#Oy!83NZnKhr4+h_X|1{uh zKGR#377tX2C-C!ncffG+5zOd-Wqc%%nHlXblpW6%2#oak`(yn0e1HHz;*iO!_IbU0 z6l=UHNvp;z34l0My1l6lcT0M0Y}9r*x|S?nx)2!J*o)vBTSKJS_#sTafQ6pBPt#vJ zDr>t}xKhI}+kkx?;461xf1`v17NOLtJ1%P>Lsuu#uDkvGN#|i(s&+Sa*y-(G+_^__ zZ~g1x1M%W-rwGU(SK7|Y0M{$peW&5N-ZIG*@3a4cj1@elI%C2Inco*oKG^8ye+Q;b zo@x52!=gvEWHSGi^xF>VU`T}vI@9{$F!Dk-V;a$J?tHdFk25fNyGg~)=&p|y_^2bo95ku0>!_|G*tnH+{owWMN zgO&pk6n3<+KpV2YP?M7Gt(5>x}Cou>j(hN$k^UK;X-wf13@CuabT zGxre8WBA(h+XR*{)OqyyATYS7IcG%w^ujWU!Y1Ca%ev%+o!_yX94*{VeA`=>FaQv8 zI^&sql^2&^@h-2EIpv^`@$P%Rdw*;94Y$dekM}my;!;6QMpg%bpx$pn;E>43uZ z7$t_@)$b&kA6j>NgGv;C(PYi)J2l?sK|`Uz`kgQwi`%Fy%zE`X-K8{&*-h&L5(A5e zmBsVo=ey$E6K>w9K>R=t+i6#G6ZFw1S|Qut7mXwaDeg1F54~0XPNCe1jK-DSPiG$a zNQ%b|0gQjJvX_j0{sp&r7@3K#uti7W4tMf-O|D((Gwdn=dg6d2E2gMAmHjeTtvwX@r9w+dFMe0n$OV6+H3+%H ze&DHhQMPQ3*#1FspPjyFP?Jaho2!TP;%u?>l$H1$Eom@~T637Lt5oGzQ2%u-+tbB! z^hyUX5#;0krRJmot?}-+5uZ}uh%AmiP3d%PIrm|I<_?eCG}?3-?5c8xE)J7YDjPL* zxdt^ojXuwLq4l2Ipfqek1(WBV(D{t{omI+%x8VR*&FrUFi6|l%ZNp1A1~CndDF&dw?V!lz;!VrP;dA!Q;1HQPmGN%L zE|=V*9_Kv{)VD#Og^`@-`J+R`M?aua1lOw#*hPy3yES>%Fq@;a>x97?i*qZ7+xlNs zx`;G2v~>Vf&EFfidoSAhba-wz`A`HNo}3Bx?N;SWK_iO?CHBmWr`{hy0=jeD8QTXb zYJFcuK0~R08x4(Dr)^>-5Zc>OtuX8DL^$VkEV1C)r*U&1{yOy_948t3%=pOjAkwEil&1GK zOJ1%>)kNK^SfC-tTwm>|ce-NXJlK3qSTJ5NGE*gm+uG*26(+X(Sze&8t0RdnV5*=a z4X4s0WQTe`szU9{!BE2cMe6y&s}Oa{p)%&N-f7e~(|%!IyK6;v(0y>*=8p8;PGxv< zG=Vq;^y6a3r_&#fsi^umY?@;?6gt0>MR?y`4|c_eJz21MgbpP(!hIkKm5xa(hEbs< zq=UP;y>>a}_Yh7(t-M%oX}R4vdm_Qgj|H|k#<5#O*<32Q-!U1YiBD+b2_&$y)PN|@ zdw#uzq60f8`hiXE`kB2qJ6k?5XVSApv$S*24iS&3vr*bkialrlg*JXfGz}|0zV(#A zmM2Q%!GT%QpZ6W&6oNL%xgUCNhj_WrV^=*<=rBx%;cMZ~p(6DkEX50CIR!9<+075_ zG6s+5E6=an{n&CaOlSfpOq)Mk7y7R_9mVB0(JbTTEg(_ZfOuD1+y~Oauiu1RnScd! z0S^VhiaD-;NDA#w044MebCf35!GP0U5q_Y^?J%4)mb)fF^npVS-kNz7A;%P_IaKjn4cD7594%la<^g6EyKJjTGfQ|n=)r^V^#@qT0q zrF{kFwAhhB_~p&r)&9=;8+_3dz_g!Iu(O6b4%Jn6#n4qzFr@E4BhIVD2S}>lwN}X%Yz&A>mJP1_h410!}Shk@j6@9Bbui>xX+lyn=vX$ByDgZ|5I@wsox>k6%jeoKdAbIqSA*GGQjxeqDn#eb|fHsen(>Q5x$J z%8RKQHI(KHiT!x4G&H&QlPYg|un(TwoXz1Af@Dd4{aCC_8NvEPZDRb^C=K(Ssw|%E zXik(%4R$`+&c&_owNG_qXRsqosnaV6w~N51J41@fj%N2q(_x9wFlaCt&H(*0vYM@N z2Ak$FaF!MX?thw~x`Va$(2i&GG?F0n$tXt@J$hyo(044|+xN)U+CtjelEs5Qh=GDf z*U3Xt7I$83Ud<=4#cw^szkU=-*miWtS6;aihySvzIj*clh)P|Z3VxMzaEDP!BKU+{ zPKU@Xl5u!INJ(Toa*LTY46ELq2hz|!!IPrC!hm(BB%Ew}OQ%GAG5+3zUBL4Npg!cz zn^>a;VZ8B8jgtU6=Op;us0Bc*s3~#8#I@$ZU_YI^ei=xsF0j<#mNXuX}{u0 zirlewn4`|G+Lnd}gaL`A_b2m#p3g0Hd<0b%7$ZSmk0mCb7hGAo)$^nXdm*S&#o^ch zncyZmUg=Z5i9$(&IR|yXm!qQ+wukdLVDboE5Nc1`ek`Yq*-fmUA7MI*xgp+ z3)nDS{bW#gGv<(S;kfxxkhw6jL)pykPK=R`la*xH!tP+CdKk{=Gd9$+Tg_j-#W(?0 zyGxcCHsxp8%UU!aP$zp*M@uc%|7YKi%nDv^v^VhwOM-7S^KUP+hxNrccdsc0PY*uJ z6AZEcC~{uFX-Cc_QmQnPfehP)U*w(NbmV|jUa;B0LWM7LYZ+|c*y{jZJqeC^P)=Io ztnGG&aIi@GYCy(NMTbaxuB4%%X~MF=vww43eBln~xnhvp9nXKWN?QeuQ~fOv_lZIv z@55B58fUl$UGO!(Poa+K#VxzLX;ea^r`y?vJF}uAC1Mi;=y=nNFLmC#kNn7ONeT}t zyqXXgY`}FW4iYH}Mu=Aiy-Qc(WUNkY(!Yc*5O!yO!p@A#Ht_^?NP1lO)ktDXEYR(M zdawR?MN9+JEF=&C0=Pi*EZpZ*C`bg$QPC=%d7n}%sJAaUef_* z!1i4d-%%k{3EW^)rLWI^tTE-ehRzF{YeC2A`#G-ww;UpUtgvKTk8L4$E9_kbqpSrB z);%s_K84Ze#ViS6r&j!QgbBO2IfCeHbbY&j?lGkO}+kx z_G0nj$faShxM?!E#}yp(iNgKe-VO2N1R?uR&mP*j+2c7gl#a%fwwwd|M*96?_Acjo5+tEVYLQNl?j{%?-_e) zr67xf+ zpB`vC|o)WY|`XR-&VK!9BN7u14PAPsgR91l2sg9-%P7;^y~czO(J-srI3G8Pt?A9z>5T+_bmlLf+p(y%3GT1Y0#fPR5mgcS{4Yw$7qkL@we+9qQY zeH%{FFUdP=>$~U%5>aF-?YM0b_xFoE?u%dGIx?=22MG&v^OXR8&gv8^f8%YgIX&Ex zJh)^jfaq9D8hs7!SoUsQHe9>;`(*a_)5KE^AZp*8hrHiHvBqtSxd zo&kpB&Hl$mY{hq)k}dDMQd^AC%$sos{jLp;f96P`POOFAdTs7G{H33~@Z+*c(9n-Z zC&jZXKwWpFi7yZ<4MulrA;%C{NU%P|_hw`~dKLqH!;N;ImV`<*+UI^lY7?tc&Bu#+ ztZ^s~Uf6&_$LPDq1D#MVZSyMuMsAe`qWr;55>+^d-$33k;|i>#^lz(;5Ngx-y>|vV zH*wXkn1B6WT3u(+1}z=X;m?!cN4$-T)Bsy3J2phVD-V=NIX&e1w%wKzqU-+>bFKK*7z?fwq$pY#fIXX!%b;~o3AQ1Uhn%}3u4h*`iBkqn%*SZz>m@CiSo zg~!R23Rd#jd!gaQluu_~&p7_m$M2xQ2f}<)=BRiN2!{bZ??`jYDfE8Wj4#rjbYKTqn)>Wao;Ju9n!gK|6{C{tx^W;J~~@%#~S_Izj33Ezg>hfw>mI9#=M-K{R>nSvXsNit1=?_yE))uf1NO^BqP*y^0ByXLW}JR7b# zSQ@tJPR?)(e6c06i(I`Z7Q8>{ZFXTlf~>>SM%HsRxoDf0aUukHk=n`nlR9*M-}IIL zu@+AN0dAV?(2K%p0*_~#a;I;ZEi&wdJZpfvF_?m-6pjVF0!Qd!5CDoNfoaHQL=q#A zXYbWAt!Z}ISO*4>*Ri)o4{z6w z@Y>woiS-{%3WBQ{I~ghB;swE9w-b%x@yWrg=x-wDU*X#z_7}IL;UtoS%hWdLt#NjZ zwxt4CcBuffM#91%^zB@Xi23;AZ&dB#MKNxe&dAPTF@LBnjyD=i|2A!%*X`&4_b2{Q zEvB?DH!?QC;T1!=ksv4~kIAOqL!7HiCx70rCzZ!O*;5kM&6C=WjaQ0hp){g1?fYHH z^M*~&o1lVWP|{;WKraHr(3RMmTXhXARzLIpodJ=?1KZ!kJv`qdhaGEjSEzNW=_Do&P&fV)3Ro-iYa)ss$5 zdoR`KDjbHCyNNvLE3xO!tBZQzBZa+Yp%o*6_(|SF;r3TF(W7nKzfnGiTdgfdG49vZeP97=Dv6td| zoF^L%Z1F-lA2d!H`rXRU*EPq{F=9nppKBX=%dPZA;$Cb}Kat^`s_ft4;*R!17jT$2{_54_gPTB|~lDRG=FcNfdnJOe+h{dY%6};m?dX&kQ4Lm{fXnaV@ z@an!v&i095ZGu{)k~_){drtq3d%idlSHJ~JekzPr%&`TIYwHhO znh$aNtB2!o?L__t(jR%Ub2r zPXcogw+fonrK4p_BZjK0)&KJXCQCY| zwAU(E>A_s$h+h5PtBhUoG1nv#JR$_$NtMPobJFUcwcO=4Mk&jwnH>QHNx7^PSp2#w zps71VA|{_|u$=FqUin25yXubEuq@;j3HhG%1#TF|5FvzAc+*4&f+D+q+)1$oz%L)| zNOz6OzUvRLv%YBIa;ElsvMyvEbxSd#whbGU%fT;>Bd(;-zs}?$h&4bK^wg$h?8Ss8GRRuiB&~*t3$f6&n@exQg8U;wQ%F*)U66j$o_?C+a4J-XJhb;$5g zjGC{yg{MkzlR;3WFRss^M>!Nz| z=^;eK9kNmH@R0Q!0D*@vPL`bEwgQ49I$GNd^Mrluvo~kCTC(6s<0iPLwhSG0S`3l% ziL}zapEP8+O-tbeLWAwuJH3BTa~1CMT6sw|>wr0TlqY)uV9F*RPK zxYiWqh6?Yp_&Ur7-sk?P9-1xsk|tiZxfL_*XCe}9@UWQBmZa(b|)eYTv*p0^9HY_G5-nni|e!ftivIgK5Q)jG+(P|%s)d*82fds|gWpah_ zZOH)({{R|p-9|f>d11Oi*b2qRKa76rl_(2fzW9cw#!7yU^P=fGPdEyW13nsv!ASiRq2rME3SN6MQV=ZBYfQ-~O8S?Q`c( zuJn5&D_$JC)EW~sbvuJgHk}UWDtax0&<(&p50&Tv#qTwgH+)9UD>xOTpV?uMismNC+~VsRR9B&e+c zy7gBKnTyq#UL>>GMS{)-@wU6Drgrg3C^F~syPsa>rgem0WyVnrv4jJO6>W!p3G-C- zr^)~0bi+KDKpPXBLjpV2Nl&S9-oV`7lE5PU2BT-xKJk!^QI$*n@|vOirSf%kJLdjx z60cEZW^Kq#&Z@ zuPv2?Rn-_#7~4|`vXWO!hK%F|`5fd{ewHiV76s%Co{YG>cJsG!YyiKZRm4UC7@d-P48Zvod zSnFf}b7F#b)bEpd=gy`MBYm9jS`q)fyMb-&`q8JdOU$XLz>exFvQR2Es5${XDI>dU zl8a~upVtfJZ!Fy%Nec7eoPC`N-Q(F5#er@{F+w4}avjq}50zG>3T@!;BR|{QS+2%E zTLND%s~usenpLL)S}ybB)D|s~q4Q&UWaIs(%n+0$@%EWAA-_c&ujHC(tQPR5N#pC?yT6ZxS?d7vyE;3L{ptBB>(0Ew z57_|F9=z4Cek`QX$sjCZL_R5@^4MHA;>`0QvsDw~miSat=eMfgsQ$i6T@&SYJwV5@ zF3pTAK#1<#GS-|uFsQbjOaFjnS9qs+H!6ZL)ZmW^VPSkgT`hof4Dfk1yU|={iG*?! zs%n1?zuCGNp=_z2q-X+Mp9~Y2$F*#(lJUwrCVvGYve|a?(Q*Q45V`-H)6!I&vP8l|M2|q z-4FX^$F6(FeeXK2>w2H>^K}|TnD}r&6~8vi9M<8KnqH65-Yy018QmRk(9-t%vRwJq z4Wp&StYOG0SZOaBJ=Zr z2HNQtJLUP9$*+*<<>3_Ny((I^9fqE#h6bx|WO$B`@g@-VW+9ntW2%Y_cS7lwOEz>q zM*NN{cY^J_>kfPhrCXj0K9!w!{*M0V6-b);Z3SiLiK1sib0M36B~x)|?@cvcwB>-`V0^UfGHncl0*cO&Pru9*F~`EJi1x zZJY%g@?t@?!ViU?oK9ygUWz?*nyErMMv8)74WbN5>gXlng`QI4o49{-Lfw|aC2k}; z<5!#z*N2qT)mOKVzsVl^ysk5Y5YByqG0W_(&c4L6ijN&PoOW?FPj|ZCeGXT>qux;K zWT6u${;^cRv;T_+ROw?&b*O>0rrbB#5Z@03*fb|83bdsOexAyVN48jz<)t`F-MBt~Q@(*>W|0KG5Zm7kXX!w3Ug=%s?#Y ztJF;cddxW~mjK`NPMDWf#FBbgr1u4P%qnK#*dM|ppXH(1T>SG8n-KJAd(P=Q`!K3#?7ASVUI%d3gtHK6rW$DxXPp0>H#vL1 zAi;??HMY1*U0$k@ITL#5ax}U4g9#8P#4>I z8{70ZhVhW(@Djk2jf59v6lHb|UcWivR$X`8EK>NwJU%z(Mp2gZ9=goYnDiAW${Iyh z%pBXvjpOz?rZYVfrk7)Zuk5Jb3D!Yos%+j6Hmt)+{h7G%am~1>76!_4A&f($NL6Rp z(aQ_iYxz9Wn&}ckbhI`Am+wsxUz5|+aUF|rh(#q1 zN&mvt*c_lx)13A4mmije@{oxxr@1t;uyuL+F#eL0+DfUyx8O8Hvbgms=84^?3WIN9xA0u`MELv7FByCnV4mE}R~7 z7$jFJmjI#trPpzFJc8b(gUlvhHl7hulLS@Nn0ELa(8EJ&vJa=^n0PR!*4V7*ntOw3 z;zAKJZh2Ixw+Y(Mnm*HN3}%9M?Zp|q&C~ZJzO>2i?=;-mi9Dy3$$$PTbc)ZmuVE>h zf>I-4>6eJz;Na5g3&tpqitbc?LUk8FfC>ouZ z#z*B^iyrsexRoX7+*Xq?(+dl*r2%7;*d0F2=E$xk(A~>?wUEs-_moNo7BjbA_}WV~ z*+fHG=Z6I2h1VOin+)$sekq2-w6J~&8#mGiW0@n(A? zF>a}RcEM%lhf~w?iz@%m#~~N=hCBLVvIvt-w8;K+_~P)y=W>S7>Ped=DSSv8r?5J% zs4w9Kjo}ZaR?!BNx|b>owb`&nA*M7Dx2&1jZ?0iOOEam$=Vl_|-;)tjFm5oq+4+N0 z?AIP?I~+4Y@KM|#qg3?gP^Cju_Lv5Lm@;_@9kUdd*Y8_Djb`SG>w5MEq|fVEe)!#Q zK6~B{HTe*}bi3A5D}0QrQXKCnv@}%ycN`&l9Ol0E`oTjEVS6XkhxgZk4+9^x$Akzo zBoAM#kyn@)<)(HWtr)c5V?eBy>Xij8uaxyAKOCtMFSXQk^_YxPHmXL;qTO62RaAxd z>}oOJ_FLa2(|A~u5`LIpLKa}+DnHLK9n5Fq#ayL-J)9~~z!5BhBGHJ}s~XQY}jx(fM+ z`vp8k+sD=l?wj|T+Se;)iLN-mDVJmqR9J3fNff$@Wq=DBb#>a<^~8=~sUiudUK(A8 zKNaNX*%>~n{b!x>s_8(x(35)?c0V}d(}x}B(P1Fcoq!+aW)b~NRpwG zBE~FY#^}a8Dk!{NDJuY28&U`#&>Iyq0M`ZuWbvi&I2Tqce!P41OuK`Fcg^iW?QKU{&Xd0;yjw4lFYzY6{|NgA z8*gBviCUfIDcbLI>r&C>&L-{m+i;P!*eHhxdx~*BoTjqo1m!x{N-%qw6Iq7-3$*sO3b?Mfq&;S<@1|E{nD;W@tq$LDW*_nghK$u& zdY`;Kn0!%C*?J}650y0n=?M<$;UM(=y^|MgWP_PesRm&q9q`OhS>sTDrx-NUDI@5k zrC67ibil3Clad#<;O6cm8On*tvZ5O5%i-T#;0TWxLvXMw>wkIcDjJ`#x_&~o#q{Uw zNcz`zn&@bty``SC=Cnq%lp|$+^x_!z9FMMrncU@4y~ww|du`tO@&xR{ic?apDDlxD2k8EdrRR?Ah{{re1MbC)Z<8`6iO9(b{3`n`4T5q3=f4WZ0W6 zWj`(7!{f;ZfckrI+)&}*Z<2x$u#+QBHf?A#+f9AT+?(A@(&Krg?|lv1AUC?Unj;>x zi||sY+6Uz)*axFH${%XNjt-&`Yp*On^_ggy-T|1|a;1AeCmq+1)1GG2W$Sb9Nzu~X z1s3G-S}3?n$(7KdCrvLBO0-L+Zgi#Yy9|;wUPMdoWga)9UeN$tPWV=amk&n(F35o= z1s%?B#l$lhfLO+JN41sK0HD~dP|SW%-OqiCY)p?uGG17Ul&dtHKC zb^A}f*4o&1 zc69N<{PWYjGhJ>ZU~*RV*12@hAz0@AXe@rk5!Q6c-+22)#T*4KvbMv|opaV4LK)0r zs54mwEN&}NrkHQ(iqOu=q6NO*HrUA-elhxEC`!Widt55iY)=CE@^F2q<8(da`xD9n zCljVv?vy{%x8VBaWn)2YxPlB{MsWRVvTT=LMoD5GFRS0qzFFDa1<7-4M7QTp=bxz? zK6UE0ZnF73dt@`SitqLvzdNBxRP-)WggjPcb#^t5IqKJA{*`Y(IM4FIwg%s!Iu2{f*emUseV2n;H7a@FB)*6X*R=<9Oj<|n=Ki@iZoq8x7gA6TJ`5@FzKm8Q|y4LtL z;h&A|pvTck;ZgGag7L{$?U@$Tfyg`Fu8@buYt^+YtP7ATdZ=au( zQqXu--9*T}7tnYMDs$*z{t)nMNA zGhP}K<{s0$q}3cW=cnXUfO8LIt^{avRQ}s4{?4WZZ;@E3j&ho5q@+j(@GrXEz=3vfk47Zj@3rfri+Z$kN{1k}7{ML?8r|B)R|c*vhuuLpT{Ibu*49rf z1&fK(Mv{PFvmOIZ@tUW<#d0r2C@{Dj(BJw zNm`gF8#6+9s#B^$eQ&SbsqRlF6-F}NHy+fdoIbzvxd%cjdG?q(brN`Gj;M8m>*3=E zm9Kts1uNq;2UA!}CXOrW9`uXS1OqIvg+sG!Z|nn4uvqR1M>-OfsS@+fCYkl|0d>w- zlOoAy_Y0^Unli4}h%ZS?7P2TLkf#!s{idWmRqhTyZoT+8dhtx{o~&r8tz2xm+e2(Q ze!2N3t|ygiYR7$@)$d$b>g3;7vEa9^ZP+n06)zr;%UiW+d+IWyZo`CKroKb10geQw zFuu(PNOsb_oQdZli*)!I#vv#HEoD`Fk?2SCy}1 zEaOT39Dj9r4rv`6$-M2@#;BM73>+{Lvvs+$)j-YQB;@sxwC(Yf3nX&r(HMnU;;a%M zZ(#ME6+k6tw_;&~!q#|eQqeh){wOERx$q+&*VZNPlCgM$cF%99+v!V%XZVL_@(s0h zLi)bq+VVk}xL@}LT3EeL^*dL23V&_AuX&>TyreovZ2mdq_iK-5jkV0zPV!**YBw40 zqDDr936IRR6)3ge!$C>=yyP4Ch`W;Fzq$1PeU(k|vc6sYL`&w-ec<5$qPornyDC;{ zRNWmOCXt;UJ6QVehX5fQ>dKvefd1QROoCjwDeAyx?36Yum?%;XccO#l&Aih^?1rn< z+x+=Bxc(8(h!vSh^N0!CJ!>DPm3T34s2F(8{z5oQ1DV+G1Ci-eWsTTW(p`%;#_ivMQ_s`MgClz^nq^#rWAIH4R zTu8J*<>c{iWP`)x>KczQX6ew_1|j z*Www8E+Y7LSM(*+HN;~ zo`= zUl{`T3iXhy*#Ew8Xjf$qON4ba;01BcUZ^ZX)OoUbqJOFOScMYVCs3#fZK z{L@`7+#A-4gu^p!%gR|FPqy3DYJgt}I&A&w29$I*jvS!@AKIx~sUZ_wM ziJg-9>aRoAfEE0qSD@5<^<9&@d_&#&aU8qPood*Iwr+=$V8HO!7%Qq%g6;0j^v}Fo zB9aM?Sp35+*>AlO|B`qM6KA0}xNt30tBBpsl{az-3psd{xHXf+^RS?pp{U;3#W;&9 z&o_A$sgv9Lynnv_IfUbXAC;tO`}k0t@rvrT2YA~n!}a8LC&AwHO!mvJLq>Pg175te z>5j!lRCZAZDYFJ#kCs|eNi#e!N+^?_mZy>V#)Xo0C||wlcuY*~nflLe*&Bk+po|TP zO4>4V<6B7h40|l--8chM6!F4INv<_UjB2cWyATus^%>H;j@0*Y=kru})2z4(+w5e$ zaqBLV=~@1GHrVsKKGj`{e*I2um!_k0{y_43MyFZL^CM$qhp*0WcPSe`v6p)NkwV7x z+lp?ah3g@EK6jD2lK*&7p@8%qPLR5xoZjD6UV-i>r`)b`zRrLpiwoxYt();@v9du8 z7K%eF$Z0$`-`El@@IiL;{GV^l@i)uozi#9&Ja1U{RB)wBx|}xs<55F-C!f7=O1tXh zA zIA5op^h%P^G2$zmZ2Af5v#MB*eIw@iL;T~DuS2$;u_3MW(+^dG=s946Q;^|$eB*i} zz)nddhqK&PX@mGTpX0UKaXL|qcHa-H4xsm0@PCeR>W$cExQm|iI#nDnJJgl|EPc%neSxk!Nh!!0 zOBOJh&59UUwq{r*;#$f963a1ZW~CBL84V|ZUDqfe0Hh^sUL3*Dod$O>^qtocFSe7d z_GRNQomPp%&TLR&xKc4O>sf*3LQO~UREC$8u$bm%^V*P6dU?HwP8$1H*WN&kdm98DJb{qBxEtw10ct3Y0vXUD?RS)H#=h>G2RF(>2gt!iws0 zgK<%!SbkB!slMtk!q7-S_tj_tcXe_8VA&g;CS6ykiEFji=~g-SX{}I+Et9OWnX2h~ z5_K}+hIqdl21qPoh4F9n*#X0J<+b^0c-t7q8t3RxMW&&K zf8?7X15Rm|OI{f((Rp5a$W31BoJV5gpRT4X#7to2l@Iu-t^+$SNhWLkz!0V=UtG#M z%waj-^I8kdYx+qi>vjEKW^01ONHhz!-ZNLdbJ>w$J8wO6TS;xlP*2{kN!u%M)>8pV zdHvBQ(m)&`Z965GZLGWwdg#m*qN^o|kFAM+JK7=jfWgo&6WwbC4}ItHX)I*7gGQ#* zpw>xncX*Zbf=L4X9Mnt<=i+RbO-Fxdgh}B@2KVHZ9({VAGog=*B_0 zoovEyELmmMMjJZJLAf)EKcUmcW#-F{(yp@0G6AZ{zYx`kS*u^wH%i}!fJUF}aAv7e z6v5Pr2t```M9hU6^!=ywVmcFyzF7uSVE)dqP+{d7*kiiF7y7IVkz1y&-9jsBq66^~ zhQGnG6kp9zZ&`R3`y%zU<|S8HhCaR2Sw_TZUUvblKKl&-C2#M56$i1;7Y?$mu92vb z7UR~WV&j#J7G$v$f?$M_>^)XGNHanA4)v=UIPSMA$_Gp-2s|s-G|-Efk6E#9vRsrC z=iLC!%uRlJ3$f~&BC)9^Qbj>wrQJu638y^|%;+RD(u?Wy{%yBQco zmJFfcV|J(YOuw=;bIHUh;e?Ojqlf5!)4aWhBNl60%R5ouWNrlbqBFT_j7~`v&_+(6 zL}tZRa`w5QzAu83zQ3VrSPb*e8UPaa->O4z|ZYBR{s;^OS~a8 zSRHOVAj+yCnH!9<0Df7w(?=Y$A_-X6izrfMhcZR%HmGBk+sC$!dKDtyrF$>^6ofHa93~_k_bAz`5uHjI11bIG}{*WXM6BC*8E%!Q>M??&CsCOiQAcDEN*adAf znV{2B(DiRwtr3OqKSrXE-dQ8}@^3`6iq=%d%nJ{1#V-MKeJ9A3av@V2|Bggx15a+) zR44aFQE(vW@JL$t&Ieq)gePB_+WT7P9YW+m%Q+RY<&;yy2UM`7ZARf*R{Er5%))?n zjoF~ewdxuR!+YK8hVjNS2wuSmy4C>qYB{i84LmgHzub(O?62V~pE+MJ?}9dMO*8O7 zhk)fx?9BGGq37$Se}mIxvLPbt#hEjAI@EO!hO@7b9A2f7=BE<8Ccd}@OxBlv9x#PH zli4O)4OiMlnuZ;Ks4kpm^Q?7#(}X%X3OLg@srj3*>$xbgY-YDH=QjMr*o}^Q$}?(x z_bqOMeKoWrI)(C4iEMS8Op9Vfi`M1uvddvd{4pLuRZkNF4i}_BIn_0Bs+)&>OuK|1 ze3L?5950l+K(1dzFa~?=Sbx zVr09ZMCNmeq1PGRUTAAH>6+cV+0{hN*@VIE5n2e5#(`IM{mqwGri*?8so9Ms9jcjn zE}Y@jFKL457LHeS6IbK1b1kZBsKWKth`Bv2CEaWnm5f*Cl(ieeo3MYs>lpd+;^O`D zCBA?(TC`clzz%2ro9ltHP|gAU>d^R8a2`;Zexo_;^?2y9;oF~Q5vBbC5Iyrr=gzKt zxKmNHORxRz4z?ZiY>`v}?5em6yz8{snoJ-T(Y54-U$i*yG=8pJc)=^AL`EC4T?zR8 z{J2t@AtdU~ENi5gv1nrvltmYvu*bA;5++%e4=iUO_ReVDwxJ5P+|jT&{mq3>HFroK{!uru>m-_&79JkNA*-k4D;X7xL`jWTbQE(I50_7SCKAhDb7szxs@1i-a% z3Hwfhn^8yJteeCS8SyX&Iff7PLY~jml3t{Fsyowyux=w;hgnyRV&|Xag{yvdf*FSe zPo8B(fX^x~eh*-qyp9car)WqP#ACymTR@LM1*%ECK#qC6bhzGtZf&iB*tWaP)#{^h#a#pAUyc-zEhpA;?rrTEf4+8cF|&rhXem6n?MF)+*`d4zQdG+8jI?v5g5{Z2VA zJpVGqGw*AcRr)tt-Xg5w5$>h6N+jRJ4FPO>_~s2f{6k5TT&D4geU&Me2&^hOfW6yrXj zQC5dpurP>mkCnBqqOE5zxjacW=-T&3A?8=bBx-{|E-%h$y@1UxcA~yQNcGErGm>$< z5bDxcxiFI9uJktjk*gq|@qKC*vd5Rc9Hr6Nf2eKNul!n**z9=2xKB&wb@hDdjzz4f zq^uS>C|WvihhP@Y8ldM28xWM)+HxsEVsx_bio0($UFSa=i4A%27#~+GQEL@le%(%L z*>$uT_{C=qQ@KdZwypGl5kDuvT=zLf`lFn)Rb@`LCM1LszF2P$FnaI5-BgY#cD zjf&y(IQD4(z2@Cs(C2fpbg5@lci|bnx#OHAA$V1h9di}tzIBST%il`c`ni29DloFr zFP80pRNZ?0@#d1`tl}xQTMM0Mw3SKr{?$pHDa^l~sjISGLI* zR=+{KVbwpYo^jMCqq@qXc1c`a2jw3VmnI~osgRIz7J(^hU?7!OvW$N1c~be2mE%}U zHYwLtaJ3^0xAY(`%;WOgOKX#aj%s9_SNd@yBpbIjT0Usi30`_Lwb`h4bX>%{ZpE62Zs{AQK_|@|?x1u(m)ss{t!k`)YE4H|9NF?EGym zz}K3J-hFS#8R}CmDwQD<*le4eaab}jz}^?eg59zfePW!7em|V12+4(S#(&=mxO{> zHxd6@@KYvdgLcx(&Bx1GC*SA=4M(Nj%T&-!M&anbJVQ$3BPPf4#E5>&^@s+2F*Lu+ zKt5&5>5xkUdZyyzS1Law@ng*2P`>S*zyX2UI^5;_Ga1jD-qcYjkQ0pP1P^1Y-)ArD zK{J^wd>&cf+Fr=)Ak7Kio?e<+#K{O`w6|=*vpS{~iTR(L1nS=M<=1jGL zjL#`2=5Y6T<7ZFS=D}d!8@I^|{ai(OKZu)8}#ssLH#^4Mt zzUhiC!#Kx79_3AF>iCX77cI#6(+%TM&3CVt^TiYRX4Cj)kvX$WCb=l{c(Ink9lH^S ze{h3D9OGyrupBWUW81U{}lPdG3r-eHTxYK1DIRBLIKMd(yYtKX; z^K~)C$NiglAnRSpE>#VL026g(kj>xj@B-&gom-j3EM?9deU}&2Sr?@y8K?U(fu?*0 zW$p_g41N$e){ zt)=EWr~oW#)`Xd*tZ(ur=(T~lPoBBYsy<{*Byf|wj7|8!S=A|VOfDH9=!Nn4MUuqJ znOqvR4a_`}=mpGIlx&rWO>=Ji)d9im-+g4DU|noY*7`Aym^tO|5|!z_blXd&#(AM; z0Tp9gs+9BSz}9tIBbrvO=M3_$Rf1m|+;OVG;5#k_bXnK)FuQ!_Vf*t2V<2=bL1r@X z(oAO_3v9l-Vq(hly;G?+`*~C6`|nkCd6&6)z9q_ig!NqPNYQC*V){${{lUR#-59@8PzTkVh^{e|9pReUU8{74b$Q zim@MrKyD1oLx9cwFnv&nrIeYERS{n|WQT@yN^z>tXEC#hU?{n&}b~UB$2jn0~)vy{vMH!JMsU zd1+X+9A@lpG!t=d3zF8|k)Cx248By4|3guEpq*>$&vLDUS-xAGcahYR2_=lrdIs+C z$(Z6odz%)+?0%JPsgR4e5smc8A1b@4cB{qqQ0%&Yl3%2f_n8UHJn2y`o<+}CJury^ znO!(eZCT!GpEKj?=eQQeW;mj?sgm)hHw+z3xZKaOa z6)8Zrq8a<0B+BRit~9X$t=IZgo4xUs4CHxht=gBm#!g#w?G3_VCjzGO6Jc9f6=l-}>OJ3srEVS|YU2q_WI_Fs=9j`!;Jp6$FbslMtWcC@HNYYoG@i{?!sDok6;vmi1(`U(x|$!Yg570P zL*h~JKQOzP8W(gvN#Giz+D0LsdlH{CS32YX-6&=WhuCZ|`wMz*CwD9=@B)RejDZY# zEV6CyOWWT(x%}xLP^Tzkq(&bMavc(7y?`#92^_yQ5w(x`8k)P+PKJ_Wj~2C=rS*VG zm_k!S`P0U$d-}<@292s;Tc5dWWQk!YtD1;}1_CpwImv9ZGh&;~Zm|!=KMQNf;w9iC zc}EmUT*)tg2XB^+4Hs+IyY*UDqDDuwI@>cY!mY!a?!85^KEAg7vTiJ=v1~4EeL!Ev z#|b_DI;n?RbWKR_VomEky*V(@tZzalc7ARN%&p3}s%NeS&CQeA(r3;CuM~rQ`tmvB zQx6JP9pQ7%^{q+zd^QoSbXo#ouyHFPxst;zz5LXYNU!ZXW+2KEit3R95j!4>{QzVj zPAPA|;QfSM!`LoxY8zbhF?4k1wM`AJf6FfvM?4MvGXrtSGEHjML!rw9bua^lRNDV8 zM*6G}@%ZGqB{km)oa%=tH@sZm137u!Mc|cA9pebPY-;I*f01g(z_hBJ#hMCmJnvV| zF)`NB83e> z8nm40GlZT0SlHA5A!x6WGJa3hli31-*FTO+LbIH@se8~BEYMWRa9~U9@hc8EDFJ(1dO@XMADM>yB?ZQ32fCix*@aReQ|nD} zis6{}Q^T1^qi`AT9CzHQBVZ4JY`#>q5t1pM24Ytgi}VuJ^=CJd;6hc3!EoEt4&)cE zs5*V!PkjZf2(OxylK7Ft0)Q&NixH>aB;bTfK|r*0#$4=aV0mdeFE;*6v*kku>by!~ z-mB?Mi#M4EHFry~%vi@gGn#32ij~-M$(7{UF>s-I&9VGWF%Ig*e*kCotc3sj1Su~C zc3M2%d2>onaVcj-Ip!H0FY{5ey<=VLKz{x`pUbiA9f#ho&P&|a4>QJX?FDod=)#_# zhnkZum_wJ@vooOh)|hBsVSN$(9}{SD=!+J|p-b3U0YPbOb=Ni~4ge;=JLH1_FKU7j zl;l)h1W};!5mU=)TEot`+Odg|@0;>qN6-)dX&?^~ll60@pW`8C2kv{+r;GXBi_DUe zh6@7Pj3u42Vk_K8za0Ux7$38*_1;^A`34ku_LK*>;D^Fx);vw<{RKF*vZRq3BjPtx zi@i`0g5vDGXi1SoD$*Kfy=YO+Xd39MaQZ>~+{;Nz0}-fpOuP{O5#;eEtjySpe}Exv z^OLF>tz!{AGKpV8qp!5yQP8ZonGI8L#$JLvU1A>}PL5wQSES%QQ!}FaUb4z>syW0$|{%N)-3i2_vUM#7Mkat|NP;lPiYblW+}Ju-Q+yJ zk`9{f>PF!VKf*L<8jQ26>eoKK`PiV5@IAD6D|C!A{+|L1aC=pvq8z&JPXs=H;oc^9*;lz(2B z6k4>HgAx3o*i~q)*F45tbd{`h(uDQdQhtPWBdNiSe%6Qc$IS*LljN7hGVclTDdV;S zu^kpKKQduZ*23(y0f;tz>3h6=_D59RVvd1833~8GR!&#%`rfWY&4UU#-h#dC1FYge zQkrsjzJ@+*2T+{l6J4c`;t5=HP}G_)cfvx(LFL@Lzfz&BUD`2ocY|Dvn3JfL2T<|E z`m!fLdDoJ%Y4cAM0yci(_wHZBO@8!on0mYQW)~B_;Ff8uHj+TkuRAV^u_?b+d*_wD zascfPzl=Kmlilt^=$`wZx1+2Ne$0sp4G~!VdtNO}k86H&TOULoQ2jzs2V>h8#y0F@ z&I(N**gPA|Pgz0y>RH{-{MYGx+$LxW+`}656-oUiw);~tBb#-UzpRWqLlW-Zc-rE! zyz{miLy1wkW>9Vp-3`1V8rOm|YQPIy<}G4$8{6))&I)=pK|gK?O(P$WCoveN8Npe=T4cg=-I>^njq%CtQHx|m!N#Tx(YDVYy4ij0;Uy(KWDuz)iYr7=#18cAmT7RU@>Fb_6|_ z$FKNxW(eXgG3nDP#R~yzBu24&$#v&hTpw4~q&y8Ecl~PF$JyT|r>3#F3sMdcg2*-k zyddTITZvGBWqmrbLo>kN>|cG+Or?iaA&e>i09v#DD9d}8*s6mfFG*JpXeR7jXATI|cV zLCEa#B1LqTYArH1U~AZ1gz(YAZlZ(mTgb5D-FmuTv|!^uxnsj2nbS|To3*;lA3Z_0 zlzqUYtyEGZeVK%WWWV;vl8hd6ieD!%XN~UczOyFE!t5FTTtE3zuxA%J7#-TSKgh0GIcY zo+@eLyl9v<6ly}Zi}~B=6(i}>V}#qvEHVn02+%Q{( zNf{M8m;eunWkcb|ufvl{2xTWDS4$%vJ?jz9rvA<`kf@ANXWr$QtCbk|PNNNu$9U(% zW}2vmY3qS*il67MYm5B;iL&6ewNp!$1ky=qkt$Xh@gp>`z^Jk-YRMA-WL*_pK{0iO zBE^R08$8m`WWJcdQn!IKFQN6wZ3`p9;8a;M(yHK~yt zgVoOSfPS6=`{B8_V2`wfUmkB)#gyb(U-&WSWQVr>-HTxI*GP*Rp1uIJT=uW)l_WJh zcwz!@c;-}vlssFHW_rM{{0aMbhI?Sx3}MP9vaO9z$U5(gvY%1apfVv4jVtgb)HAN1 zW7L#?fy78Ygh#fS{~DG}TH*h!gZo+8iC3FF8f?C7M0fR-?+irf2X{RHH#{<@N6m2?B<2;*^zjPLUrVHW?3mw$nDQ+#z+aksJ&)** z<4a2$5m2Ajleha-f~tB@T=!9c{b4OdG{(tp;<;WK(D|JP+;shv1Ihz;NHWIVdBjwN zfv5UuGXr|W*{Xc^NE*veaQxekJM|B#qgM1 zY8X%*llu7aIM=|+YkMdkThv-JeDHI7={MyhF8A`dvmQJNK4A)VT&#!eUS8vKTMw6q zoS(z&%st~|elNc@lJupKQR?WZ1=N;i4u>Qn?Pjd40&B&=gd4+-k4j=9LYbK~(nS!r zn(1qtCQE?jlesg2Gyjs6aRgrW5O$UV|LNxMIZvwGWOLtZFKI&|Eal?j%8&AIeGTt- z7V`gVu|M30EJPP-WTFhqsV}=2w^S~Aa zz4=B*z;SxEJ9{`sYDRocqo*IFykJ=txF1bhHmB6kq4}X_t2o(8GTG{Iz4=lawZ48z zk|esCkjM{&GM}~ETy?3e3}t$rSEHlPcYZFhwtOwMV1xeLid&*;g9t=|9O4wZK%j4E z*jUW4al!l;KANCc3cH#-ZZrRL?*u-!7$te8ANjqC*J+R_9DMKSMcXiv&{$bi@u(ja zYyjPsN)%_BQXZw;?%za~Z?J3=d;DSd?vK9E@!-0)6B9)DVe%d@z-47?p=60Ltb5bf z7>C+%ZYkfBk!Ca{H(a=~O;CM$NW`;6DfvBP(_nX1Hr9K;gsugnsz;=@mEND{w|dIK zHd&a8DmJP4KC|`N_xMU_5Mp;|v^|7d|1!fnn(SHk{4O8GU_Ld*vPEC*sP=at#y$qJ zc3#-m+C3h+j*?})wdm+>niN#aglfruv+3;Z@YQ5!Ekf@iwt0I%w;VL%vtB>TO7_n1 zI{!axcN2TG>EANvcZkg-4Xpk*EUVQj?^E=~lo#7gvTyRO64C|XtjO_ z^qU5a@dYx)E@Lw}jDoD4-SV1p{aE-IN~UEVezU^&At-mFWjK}HxQ&#RJ`33%3%Eh{ zZ|}p(J?2nTM0i#0uu58+@nPr9fW073+}vjJg&pILs()?_y^?MC}Z^ zKjLfQJXh!1>nU3XkS|#U($11v?+!~=V)hxy?$ci*jcZivi2}?-eKJCwWlzlHM={|^ zorCC^B)!~WUw`k8I}@j8Zq!ZP7Y1~TcijkL4-KMZhMwQ}=UUoX^RtLfXr0Jbx8NLO zZBlCJcf-PNSg(|fjLP7>f~;OEc(K2xhh)A@q8BnZ^>>+>$$0KXb}NE3KwhzjZgcAe z3Aa79g* zc0nU^f7MlOkt|bFRXf2m3V2%0Z)xT~)+Mn$DDwUCyh@md>?t!j{mb_&tAWK%Vv_VN zUq)lhXZqWA=ZR*dettvv`hzR5h23Jmu}EnHDbXV3@VWY9ZBGg!qtb@!I?Yvr(h zHn5e~SQ(OhWjfN#3fX2_nEXlu*vP*Dbwl*?yxX3gN1+N<2YzkGC;hs6D$VEaweNV8 zMn=Yb{ji&??FcFh@nmue-IXEDzIZR)=wlE~!v+Q8#llGigM z-(5pHo}(-CD>Ru{FBW1Gr6^=pU&WI9e85S}*GAOPRiTwUh?H1q<=8vhF5f1x?%n@x z%!100Ms^uR!)0X@H?l~`G+^MFU0|^&Xv%&z(E(c|TQXF&3omMW>$bMhuw6FcP>?l$ zX}SZXQ34|}ev5u1yU%=$>am@V(a}n~Zi;hhQ#%P$k;h=}EgJ(kD-<-U*I5@ZS-UeK z@N`JxWAav%l|)5BfikJdZE>a_;Vp#3teo-!2KLawiA8O|8L7!jn06V7=npgq{I}H> zmeY^n{zdgMn~aJq`a0<@M*Ay!Bn8IgOk-(INUVLwg=UB#$Hl+JoA_y6txcB0HBu`! zO;omGg!_0O*CvVN=C*LyJtYn!)Wdz(QtV7yN}h<=*2VEzJl+-e2sj)P_ZU-T*d(!c z$LD<9&@G>IJpE(hRkSnr_9$8(ylW-z);4$e{*Q#NAWg#dc*^V)5t5d|No7XrE*mb^ zQe{NrA)T9>Oe;Xw?<+uTtn`IC%4@Uzb4p7;JzU>@#CG+qq~##1MDuM!R?p5;GUf-@ z`7N1G$2g4wM~vgZ=7CrbgFgoxN!bi8bW zoo8VV4Wz^m{x3~nuZMH(JKG!^xP*`o*<+e?=U~@QIZ6(|8kOE{K`Fpx=khEm(UV55 zXG6tIWh|Cz!-gE=y0Qaan!rsEaEFS$|Fq{#Yc}HVe61`Tcp%_LlfpI2^ZOM+t}6}f zh9=+Y*4oE10SHo$BMH)V`$%D|#{-0nO-y@FK%O05kD1g3}7$O87M@8N$83gef7!3_oOG&o_ESn{CTq!9aphiRs2 zvjvt^T;4K*@2i?E!IcRzRFHPy7}bs%DFfp&4rXUR7P8iprR*jsFirWqj@x4R5bVi5w++At_vwy;sy!D3h}c`9C68$iDyp literal 0 HcmV?d00001 diff --git a/advanced_source/ddp_pipeline_tutorial.py b/advanced_source/ddp_pipeline_tutorial.py new file mode 100644 index 00000000000..9a9fd909436 --- /dev/null +++ b/advanced_source/ddp_pipeline_tutorial.py @@ -0,0 +1,464 @@ +""" +Training Transformer models using Distributed Data Parallel and Pipeline Parallelism +==================================================================================== + +**Author**: `Pritam Damania `_ + +This tutorial demonstrates how to train a large Transformer model across +multiple GPUs using `Distributed Data Parallel `__ and +`Pipeline Parallelism `__. This tutorial is an extension of the +`Sequence-to-Sequence Modeling with nn.Transformer and TorchText `__ tutorial +and scales up the same model to demonstrate how Distributed Data Parallel and +Pipeline Parallelism can be used to train Transformer models. + +Prerequisites: + + * `Pipeline Parallelism `__ + * `Sequence-to-Sequence Modeling with nn.Transformer and TorchText `__ + * `Getting Started with Distributed Data Parallel `__ +""" + + +###################################################################### +# Define the model +# ---------------- +# + +###################################################################### +# ``PositionalEncoding`` module injects some information about the +# relative or absolute position of the tokens in the sequence. The +# positional encodings have the same dimension as the embeddings so that +# the two can be summed. Here, we use ``sine`` and ``cosine`` functions of +# different frequencies. + +import sys +import os +import math +import torch +import torch.nn as nn +import torch.nn.functional as F +import tempfile +from torch.nn import TransformerEncoder, TransformerEncoderLayer + +class PositionalEncoding(nn.Module): + + def __init__(self, d_model, dropout=0.1, max_len=5000): + super(PositionalEncoding, self).__init__() + self.dropout = nn.Dropout(p=dropout) + + pe = torch.zeros(max_len, d_model) + position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1) + div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model)) + pe[:, 0::2] = torch.sin(position * div_term) + pe[:, 1::2] = torch.cos(position * div_term) + pe = pe.unsqueeze(0).transpose(0, 1) + self.register_buffer('pe', pe) + + def forward(self, x): + x = x + self.pe[:x.size(0), :] + return self.dropout(x) + + +###################################################################### +# In this tutorial, we will split a Transformer model across two GPUs and use +# pipeline parallelism to train the model. In addition to this, we use +# `Distributed Data Parallel `__ +# to train two replicas of this pipeline. We have one process driving a pipe across +# GPUs 0 and 1 and another process driving a pipe across GPUs 2 and 3. Both these +# processes then use Distributed Data Parallel to train the two replicas. The +# model is exactly the same model used in the `Sequence-to-Sequence Modeling with nn.Transformer and TorchText +# `__ tutorial, +# but is split into two stages. The largest number of parameters belong to the +# `nn.TransformerEncoder `__ layer. +# The `nn.TransformerEncoder `__ +# itself consists of ``nlayers`` of `nn.TransformerEncoderLayer `__. +# As a result, our focus is on ``nn.TransformerEncoder`` and we split the model +# such that half of the ``nn.TransformerEncoderLayer`` are in ``TransformerModelStage1`` +# and the other half are in ``TransformerModelStage2``. + +if sys.platform == 'win32': + print('Windows platform is not supported for pipeline parallelism') + sys.exit(0) +if torch.cuda.device_count() < 4: + print('Need at least four GPU devices for this tutorial') + sys.exit(0) + +class TransformerModelStage1(nn.Module): + + def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): + super(TransformerModelStage1, self).__init__() + self.src_mask = None + self.pos_encoder = PositionalEncoding(ninp, dropout) + encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) + self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) + self.encoder = nn.Embedding(ntoken, ninp) + self.ninp = ninp + + self.init_weights() + + def _generate_square_subsequent_mask(self, sz): + mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) + mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) + return mask + + def init_weights(self): + initrange = 0.1 + self.encoder.weight.data.uniform_(-initrange, initrange) + + def forward(self, src): + if self.src_mask is None or self.src_mask.size(0) != src.size(0): + device = src.device + mask = self._generate_square_subsequent_mask(src.size(0)).to(device) + self.src_mask = mask + + src = self.encoder(src) * math.sqrt(self.ninp) + src = self.pos_encoder(src) + output = self.transformer_encoder(src, self.src_mask) + return output + +class TransformerModelStage2(nn.Module): + + def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5): + super(TransformerModelStage2, self).__init__() + self.src_mask = None + encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout) + self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers) + self.decoder = nn.Linear(ninp, ntoken) + + self.init_weights() + + def _generate_square_subsequent_mask(self, sz): + mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1) + mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0)) + return mask + + def init_weights(self): + initrange = 0.1 + self.decoder.bias.data.zero_() + self.decoder.weight.data.uniform_(-initrange, initrange) + + def forward(self, src): + if self.src_mask is None or self.src_mask.size(0) != src.size(0): + device = src.device + mask = self._generate_square_subsequent_mask(src.size(0)).to(device) + self.src_mask = mask + + output = self.transformer_encoder(src, self.src_mask) + output = self.decoder(output) + return output + +###################################################################### +# Start multiple processes for training +# ------------------------------------- +# + + +###################################################################### +# We start two processes where each process drives its own pipeline across two +# GPUs. ``run_worker`` is executed for each process. + +def run_worker(rank, world_size): + + +###################################################################### +# Load and batch data +# ------------------- +# + + +###################################################################### +# The training process uses Wikitext-2 dataset from ``torchtext``. The +# vocab object is built based on the train dataset and is used to numericalize +# tokens into tensors. Starting from sequential data, the ``batchify()`` +# function arranges the dataset into columns, trimming off any tokens remaining +# after the data has been divided into batches of size ``batch_size``. +# For instance, with the alphabet as the sequence (total length of 26) +# and a batch size of 4, we would divide the alphabet into 4 sequences of +# length 6: +# +# .. math:: +# \begin{bmatrix} +# \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z} +# \end{bmatrix} +# \Rightarrow +# \begin{bmatrix} +# \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} & +# \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} & +# \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} & +# \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix} +# \end{bmatrix} +# +# These columns are treated as independent by the model, which means that +# the dependence of ``G`` and ``F`` can not be learned, but allows more +# efficient batch processing. +# + +# In 'run_worker' + def print_with_rank(msg): + print('[RANK {}]: {}'.format(rank, msg)) + + import io + from torchtext.utils import download_from_url, extract_archive + from torchtext.data.utils import get_tokenizer + from torchtext.vocab import build_vocab_from_iterator + + url = 'https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip' + test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url, root=".data{}".format(rank))) + tokenizer = get_tokenizer('basic_english') + vocab = build_vocab_from_iterator(map(tokenizer, + iter(io.open(train_filepath, + encoding="utf8")))) + + def data_process(raw_text_iter): + data = [torch.tensor([vocab[token] for token in tokenizer(item)], + dtype=torch.long) for item in raw_text_iter] + return torch.cat(tuple(filter(lambda t: t.numel() > 0, data))) + + train_data = data_process(iter(io.open(train_filepath, encoding="utf8"))) + val_data = data_process(iter(io.open(valid_filepath, encoding="utf8"))) + test_data = data_process(iter(io.open(test_filepath, encoding="utf8"))) + device = torch.device(2 * rank) + + def batchify(data, bsz, rank, world_size, is_train=False): + # Divide the dataset into bsz parts. + nbatch = data.size(0) // bsz + # Trim off any extra elements that wouldn't cleanly fit (remainders). + data = data.narrow(0, 0, nbatch * bsz) + # Evenly divide the data across the bsz batches. + data = data.view(bsz, -1).t().contiguous() + # Divide the data across the ranks only for training data. + if is_train: + data_per_rank = data.size(0) // world_size + data = data[rank * data_per_rank : (rank + 1) * data_per_rank] + return data.to(device) + + batch_size = 20 + eval_batch_size = 10 + train_data = batchify(train_data, batch_size, rank, world_size, True) + val_data = batchify(val_data, eval_batch_size, rank, world_size) + test_data = batchify(test_data, eval_batch_size, rank, world_size) + + +###################################################################### +# Functions to generate input and target sequence +# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +# + + +###################################################################### +# ``get_batch()`` function generates the input and target sequence for +# the transformer model. It subdivides the source data into chunks of +# length ``bptt``. For the language modeling task, the model needs the +# following words as ``Target``. For example, with a ``bptt`` value of 2, +# we’d get the following two Variables for ``i`` = 0: +# +# .. image:: ../_static/img/transformer_input_target.png +# +# It should be noted that the chunks are along dimension 0, consistent +# with the ``S`` dimension in the Transformer model. The batch dimension +# ``N`` is along dimension 1. +# + +# In 'run_worker' + bptt = 35 + def get_batch(source, i): + seq_len = min(bptt, len(source) - 1 - i) + data = source[i:i+seq_len] + target = source[i+1:i+1+seq_len].view(-1) + return data, target + +###################################################################### +# Model scale and Pipe initialization +# ----------------------------------- +# + + +###################################################################### +# To demonstrate training large Transformer models using pipeline parallelism, +# we scale up the Transformer layers appropriately. We use an embedding +# dimension of 4096, hidden size of 4096, 16 attention heads and 8 total +# transformer layers (``nn.TransformerEncoderLayer``). This creates a model with +# **~1 billion** parameters. +# +# We need to initialize the `RPC Framework `__ +# since Pipe depends on the RPC framework via `RRef `__ +# which allows for future expansion to cross host pipelining. We need to +# initialize the RPC framework with only a single worker since we're using a +# single process to drive multiple GPUs. +# +# The pipeline is then initialized with 8 transformer layers on one GPU and 8 +# transformer layers on the other GPU. One pipe is setup across GPUs 0 and 1 and +# another across GPUs 2 and 3. Both pipes are then replicated using DistributedDataParallel. + +# In 'run_worker' + ntokens = len(vocab.stoi) # the size of vocabulary + emsize = 4096 # embedding dimension + nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder + nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder + nhead = 16 # the number of heads in the multiheadattention models + dropout = 0.2 # the dropout value + + from torch.distributed import rpc + tmpfile = tempfile.NamedTemporaryFile() + rpc.init_rpc( + name="worker", + rank=0, + world_size=1, + rpc_backend_options=rpc.TensorPipeRpcBackendOptions( + init_method="file://{}".format(tmpfile.name), + ) + ) + + # Need to use 'checkpoint=never' since as of PyTorch 1.8, Pipe checkpointing + # doesn't work with DDP. + from torch.distributed.pipeline.sync import Pipe + model = Pipe( + torch.nn.Sequential( + TransformerModelStage1(ntokens, emsize, nhead, nhid, int(nlayers/2), dropout).cuda(2 * rank), + TransformerModelStage2(ntokens, emsize, nhead, nhid, int(nlayers/2), dropout).cuda(2 * rank + 1), + ), + chunks = 8, + checkpoint = "never" + ) + + # Initialize process group and wrap model in DDP. + from torch.nn.parallel import DistributedDataParallel + import torch.distributed as dist + os.environ['MASTER_ADDR'] = 'localhost' + os.environ['MASTER_PORT'] = '29500' + dist.init_process_group( + backend="nccl", rank=rank, world_size=world_size) + model = DistributedDataParallel(model) + + def get_total_params(module: torch.nn.Module): + total_params = 0 + for param in module.parameters(): + total_params += param.numel() + return total_params + + print_with_rank('Total parameters in model: {:,}'.format(get_total_params(model))) + +###################################################################### +# Run the model +# ------------- +# + + +###################################################################### +# `CrossEntropyLoss `__ +# is applied to track the loss and +# `SGD `__ +# implements stochastic gradient descent method as the optimizer. The initial +# learning rate is set to 5.0. `StepLR `__ is +# applied to adjust the learn rate through epochs. During the +# training, we use +# `nn.utils.clip_grad_norm\_ `__ +# function to scale all the gradient together to prevent exploding. +# + +# In 'run_worker' + criterion = nn.CrossEntropyLoss() + lr = 5.0 # learning rate + optimizer = torch.optim.SGD(model.parameters(), lr=lr) + scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95) + + import time + def train(): + model.train() # Turn on the train mode + total_loss = 0. + start_time = time.time() + ntokens = len(vocab.stoi) + + # Train only for 50 batches to keep script execution time low. + nbatches = min(50 * bptt, train_data.size(0) - 1) + + for batch, i in enumerate(range(0, nbatches, bptt)): + data, targets = get_batch(train_data, i) + optimizer.zero_grad() + # Since the Pipe is only within a single host and process the ``RRef`` + # returned by forward method is local to this node and can simply + # retrieved via ``RRef.local_value()``. + output = model(data).local_value() + # Need to move targets to the device where the output of the + # pipeline resides. + loss = criterion(output.view(-1, ntokens), targets.cuda(2 * rank + 1)) + loss.backward() + torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5) + optimizer.step() + + total_loss += loss.item() + log_interval = 10 + if batch % log_interval == 0 and batch > 0: + cur_loss = total_loss / log_interval + elapsed = time.time() - start_time + print_with_rank('| epoch {:3d} | {:5d}/{:5d} batches | ' + 'lr {:02.2f} | ms/batch {:5.2f} | ' + 'loss {:5.2f} | ppl {:8.2f}'.format( + epoch, batch, nbatches // bptt, scheduler.get_lr()[0], + elapsed * 1000 / log_interval, + cur_loss, math.exp(cur_loss))) + total_loss = 0 + start_time = time.time() + + def evaluate(eval_model, data_source): + eval_model.eval() # Turn on the evaluation mode + total_loss = 0. + ntokens = len(vocab.stoi) + # Evaluate only for 50 batches to keep script execution time low. + nbatches = min(50 * bptt, data_source.size(0) - 1) + with torch.no_grad(): + for i in range(0, nbatches, bptt): + data, targets = get_batch(data_source, i) + output = eval_model(data).local_value() + output_flat = output.view(-1, ntokens) + # Need to move targets to the device where the output of the + # pipeline resides. + total_loss += len(data) * criterion(output_flat, targets.cuda(2 * rank + 1)).item() + return total_loss / (len(data_source) - 1) + +###################################################################### +# Loop over epochs. Save the model if the validation loss is the best +# we've seen so far. Adjust the learning rate after each epoch. + +# In 'run_worker' + best_val_loss = float("inf") + epochs = 3 # The number of epochs + best_model = None + + for epoch in range(1, epochs + 1): + epoch_start_time = time.time() + train() + val_loss = evaluate(model, val_data) + print_with_rank('-' * 89) + print_with_rank('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | ' + 'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time), + val_loss, math.exp(val_loss))) + print_with_rank('-' * 89) + + if val_loss < best_val_loss: + best_val_loss = val_loss + best_model = model + + scheduler.step() + + +###################################################################### +# Evaluate the model with the test dataset +# ------------------------------------- +# +# Apply the best model to check the result with the test dataset. + +# In 'run_worker' + test_loss = evaluate(best_model, test_data) + print_with_rank('=' * 89) + print_with_rank('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format( + test_loss, math.exp(test_loss))) + print_with_rank('=' * 89) + +# Main execution +import torch.multiprocessing as mp + +if __name__=="__main__": + world_size = 2 + mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True) + diff --git a/index.rst b/index.rst index dec8bc64a77..9dbad7ad71b 100644 --- a/index.rst +++ b/index.rst @@ -406,6 +406,13 @@ Welcome to PyTorch Tutorials :link: advanced/rpc_ddp_tutorial.html :tags: Parallel-and-Distributed-Training +.. customcarditem:: + :header: Training Transformer models using Distributed Data Parallel and Pipeline Parallelism + :card_description: Walk through a through a simple example of how to train a transformer model using Distributed Data Parallel and Pipeline Parallelism + :image: _static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png + :link: advanced/ddp_pipeline_tutorial.html + :tags: Parallel-and-Distributed-Training + .. Mobile .. customcarditem:: @@ -613,6 +620,7 @@ Additional Resources intermediate/dist_pipeline_parallel_tutorial intermediate/rpc_async_execution advanced/rpc_ddp_tutorial + advanced/ddp_pipeline_tutorial .. toctree:: :maxdepth: 2 From f931a062645b2d6440db6aafea6f81332c6d5164 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 4 Mar 2021 09:56:00 -0800 Subject: [PATCH 09/13] More updates to numeric_suite --- prototype_source/numeric_suite_tutorial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index fee8308eb95..d900b8f3593 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -124,13 +124,13 @@ def compute_error(x, y): print("\nkeys of act_compare_dict entry for conv1's output:") print(act_compare_dict['conv1.stats'].keys()) -print(act_compare_dict['conv1.stats']['float'].shape) -print(act_compare_dict['conv1.stats']['quantized'].shape) +print(act_compare_dict['conv1.stats']['float'][0].shape) +print(act_compare_dict['conv1.stats']['quantized'][0].shape) ############################################################################## # This dict can be used to compare and compute the quantization error of the activations of float and quantized models as following. for key in act_compare_dict: - print(key, compute_error(act_compare_dict[key]['float'], act_compare_dict[key]['quantized'].dequantize())) + print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize())) ############################################################################## # If we want to do the comparison for more than one input data, we can do the following. From 4877298f5255902df2f4e57b62193ddfd578df00 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 4 Mar 2021 10:41:41 -0800 Subject: [PATCH 10/13] Even more updates --- prototype_source/numeric_suite_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index d900b8f3593..24a26a3dfa4 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -50,7 +50,7 @@ float_model.fuse_model() float_model.qconfig = torch.quantization.default_qconfig img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)] -qmodel = quantize(float_model, default_eval_fn, (img_data,), inplace=False) +qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False) ############################################################################## # 1. Compare the weights of float and quantized models @@ -206,7 +206,7 @@ def forward(self, x): float_model.fuse_model() float_model.qconfig = torch.quantization.default_qconfig img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)] -qmodel = quantize(float_model, default_eval_fn, img_data, inplace=False) +qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False) ############################################################################## # In the following example we call ``compare_model_stub()`` from PyTorch Numeric Suite to compare ``QuantizableBasicBlock`` module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module. From 352c6a5e17fdbed7c79d48bc372579cf4bc7a1d4 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 4 Mar 2021 11:24:08 -0800 Subject: [PATCH 11/13] Update numeric_suite_tutorial.py Hopefully that's the last one --- prototype_source/numeric_suite_tutorial.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index 24a26a3dfa4..3575e44ce09 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -224,14 +224,14 @@ def forward(self, x): print("\nkeys of ob_dict entry for layer1.0's output:") print(ob_dict['layer1.0.stats'].keys()) -print(ob_dict['layer1.0.stats']['float'].shape) -print(ob_dict['layer1.0.stats']['quantized'].shape) +print(ob_dict['layer1.0.stats']['float'][0].shape) +print(ob_dict['layer1.0.stats']['quantized'][0].shape) ############################################################################## # This dict can be then used to compare and compute the module level quantization error. for key in ob_dict: - print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized'].dequantize())) + print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize())) ############################################################################## # If we want to do the comparison for more than one input data, we can do the following. From c609fd517e4156e8c4c093d6253522fec82e6909 Mon Sep 17 00:00:00 2001 From: Nikita Shulga Date: Thu, 4 Mar 2021 12:45:52 -0800 Subject: [PATCH 12/13] Update numeric_suite_tutorial.py Last one --- prototype_source/numeric_suite_tutorial.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py index 3575e44ce09..df386f4efd2 100644 --- a/prototype_source/numeric_suite_tutorial.py +++ b/prototype_source/numeric_suite_tutorial.py @@ -370,7 +370,7 @@ def init_hidden(self, bsz): for key in act_compare_dict: - print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0])) + print(key, compute_error(act_compare_dict[key]['float'][0][0], act_compare_dict[key]['quantized'][0][0])) ############################################################################## # @@ -405,7 +405,7 @@ def init_hidden(self, bsz): # This dict can be then used to compare and compute the module level quantization error. for key in ob_dict: - print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized'])) + print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0])) ############################################################################## # SQNR of 40 dB is high and this is a situation where we have very good numerical alignment between the floating point and quantized model. From aca434dd1f7f2ea1badb64cab77cf3650643e598 Mon Sep 17 00:00:00 2001 From: Brian Johnson Date: Thu, 4 Mar 2021 15:50:16 -0500 Subject: [PATCH 13/13] Update build.sh --- .jenkins/build.sh | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/.jenkins/build.sh b/.jenkins/build.sh index b1220e89b90..90499be51f6 100755 --- a/.jenkins/build.sh +++ b/.jenkins/build.sh @@ -25,8 +25,8 @@ pip install -r $DIR/../requirements.txt #Install PyTorch Nightly for test. # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html # RC Link -pip uninstall -y torch torchvision torchaudio torchtext -pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext +# pip uninstall -y torch torchvision torchaudio torchtext +# pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext # For Tensorboard. Until 1.14 moves to the release channel. pip install tb-nightly