From 284d030551d03afa7a29799785472343a2d87fb1 Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Wed, 3 Mar 2021 10:36:56 -0500
Subject: [PATCH 01/13] Update build.sh

---
 .jenkins/build.sh | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index 1e1a06f7ee9..b1220e89b90 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -22,6 +22,12 @@ pip install -r $DIR/../requirements.txt
 # export PATH=/opt/conda/bin:$PATH
 # pip install sphinx==1.8.2 pandas
 
+#Install PyTorch Nightly for test. 
+# Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
+# RC Link
+pip uninstall -y torch torchvision torchaudio torchtext
+pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
+
 # For Tensorboard. Until 1.14 moves to the release channel.
 pip install tb-nightly
 

From 21bd643725378e20f151fbdb4c162471b665effc Mon Sep 17 00:00:00 2001
From: moto <855818+mthrok@users.noreply.github.com>
Date: Wed, 3 Mar 2021 22:41:03 -0500
Subject: [PATCH 02/13] Update audio tutorial for release pytorch 1.8 /
 torchaudio 0.8 (#1379)

* [wip] replace audio tutorial

* Update

* Update

* Update

* fixup

* Update requirements.txt

* update

* Update

Co-authored-by: Brian Johnson <brianjo@fb.com>
---
 .../audio_preprocessing_tutorial.py           | 1312 ++++++++++++++---
 requirements.txt                              |    3 +
 2 files changed, 1082 insertions(+), 233 deletions(-)

diff --git a/beginner_source/audio_preprocessing_tutorial.py b/beginner_source/audio_preprocessing_tutorial.py
index 6b34396aef9..b80bb323536 100644
--- a/beginner_source/audio_preprocessing_tutorial.py
+++ b/beginner_source/audio_preprocessing_tutorial.py
@@ -1,385 +1,1231 @@
 """
-Audio I/O and Pre-Processing with torchaudio
-============================================
+Audio manipulation with torchaudio
+==================================
 
-PyTorch is an open source deep learning platform that provides a
-seamless path from research prototyping to production deployment with
-GPU support.
+``torchaudio`` provides powerful audio I/O functions, preprocessing
+transforms and dataset.
 
-Significant effort in solving machine learning problems goes into data
-preparation. ``torchaudio`` leverages PyTorch’s GPU support, and provides
-many tools to make data loading easy and more readable. In this
-tutorial, we will see how to load and preprocess data from a simple
-dataset. Please visit
-`Audio I/O and Pre-Processing with torchaudio <https://pytorch.org/tutorials/beginner/audio_preprocessing_tutorial.html>`__ to learn more.
-
-For this tutorial, please make sure the ``matplotlib`` package is
-installed for easier visualization.
+In this tutorial, we will look into how to prepare audio data and
+extract features that can be fed to NN models.
 
 """
 
-# Uncomment the following line to run in Google Colab
-# !pip install torchaudio 
 import torch
 import torchaudio
+import torchaudio.functional as F
+import torchaudio.transforms as T
+
+print(torch.__version__)
+print(torchaudio.__version__)
+
+
+######################################################################
+# Preparing data and utility functions (skip this section)
+# --------------------------------------------------------
+# 
+
+#@title Prepare data and utility functions. {display-mode: "form"}
+#@markdown
+#@markdown You do not need to look into this cell.
+#@markdown Just execute once and you are good to go.
+#@markdown
+#@markdown In this tutorial, we will use a speech data from [VOiCES dataset](https://iqtlabs.github.io/voices/), which is licensed under Creative Commos BY 4.0.
+
+#-------------------------------------------------------------------------------
+# Preparation of data and helper functions.
+#-------------------------------------------------------------------------------
+import io
+import os
+import math
+import tarfile
+import multiprocessing
+
+import scipy
+import librosa
+import boto3
+from botocore import UNSIGNED
+from botocore.config import Config
 import requests
+import matplotlib
 import matplotlib.pyplot as plt
+from IPython.display import Audio, display
+
+[width, height] = matplotlib.rcParams['figure.figsize']
+if width < 10:
+  matplotlib.rcParams['figure.figsize'] = [width * 2.5, height]
+
+_SAMPLE_DIR = "_sample_data"
+SAMPLE_WAV_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.wav"
+SAMPLE_WAV_PATH = os.path.join(_SAMPLE_DIR, "steam.wav")
+
+SAMPLE_WAV_SPEECH_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
+SAMPLE_WAV_SPEECH_PATH = os.path.join(_SAMPLE_DIR, "speech.wav")
+
+SAMPLE_RIR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/room-response/rm1/impulse/Lab41-SRI-VOiCES-rm1-impulse-mc01-stu-clo.wav"
+SAMPLE_RIR_PATH = os.path.join(_SAMPLE_DIR, "rir.wav")
+
+SAMPLE_NOISE_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit/distant-16k/distractors/rm1/babb/Lab41-SRI-VOiCES-rm1-babb-mc01-stu-clo.wav"
+SAMPLE_NOISE_PATH = os.path.join(_SAMPLE_DIR, "bg.wav")
+
+SAMPLE_MP3_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.mp3"
+SAMPLE_MP3_PATH = os.path.join(_SAMPLE_DIR, "steam.mp3")
+
+SAMPLE_GSM_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/steam-train-whistle-daniel_simon.gsm"
+SAMPLE_GSM_PATH = os.path.join(_SAMPLE_DIR, "steam.gsm")
+
+SAMPLE_TAR_URL = "https://pytorch-tutorial-assets.s3.amazonaws.com/VOiCES_devkit.tar.gz"
+SAMPLE_TAR_PATH = os.path.join(_SAMPLE_DIR, "sample.tar.gz")
+SAMPLE_TAR_ITEM = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
+
+S3_BUCKET = "pytorch-tutorial-assets"
+S3_KEY = "VOiCES_devkit/source-16k/train/sp0307/Lab41-SRI-VOiCES-src-sp0307-ch127535-sg0042.wav"
+
+YESNO_DATASET_PATH = os.path.join(_SAMPLE_DIR, "yes_no")
+os.makedirs(YESNO_DATASET_PATH, exist_ok=True)
+os.makedirs(_SAMPLE_DIR, exist_ok=True)
+
+def _fetch_data():
+  uri = [
+    (SAMPLE_WAV_URL, SAMPLE_WAV_PATH),
+    (SAMPLE_WAV_SPEECH_URL, SAMPLE_WAV_SPEECH_PATH),
+    (SAMPLE_RIR_URL, SAMPLE_RIR_PATH),
+    (SAMPLE_NOISE_URL, SAMPLE_NOISE_PATH),
+    (SAMPLE_MP3_URL, SAMPLE_MP3_PATH),
+    (SAMPLE_GSM_URL, SAMPLE_GSM_PATH),
+    (SAMPLE_TAR_URL, SAMPLE_TAR_PATH),
+  ]
+  for url, path in uri:
+    with open(path, 'wb') as file_:
+      file_.write(requests.get(url).content)
+
+_fetch_data()
+
+def _download_yesno():
+  if os.path.exists(os.path.join(YESNO_DATASET_PATH, "waves_yesno.tar.gz")):
+    return
+  torchaudio.datasets.YESNO(root=YESNO_DATASET_PATH, download=True)
+
+YESNO_DOWNLOAD_PROCESS = multiprocessing.Process(target=_download_yesno)
+YESNO_DOWNLOAD_PROCESS.start()
+
+def _get_sample(path, resample=None):
+  effects = [
+    ["remix", "1"]
+  ]
+  if resample:
+    effects.append(["rate", f'{resample}'])
+  return torchaudio.sox_effects.apply_effects_file(path, effects=effects)
+
+def get_speech_sample(*, resample=None):
+  return _get_sample(SAMPLE_WAV_SPEECH_PATH, resample=resample)
+
+def get_sample(*, resample=None):
+  return _get_sample(SAMPLE_WAV_PATH, resample=resample)
+
+def get_rir_sample(*, resample=None, processed=False):
+  rir_raw, sample_rate = _get_sample(SAMPLE_RIR_PATH, resample=resample)
+  if not processed:
+    return rir_raw, sample_rate
+  rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)]
+  rir = rir / torch.norm(rir, p=2)
+  rir = torch.flip(rir, [1])
+  return rir, sample_rate
+
+def get_noise_sample(*, resample=None):
+  return _get_sample(SAMPLE_NOISE_PATH, resample=resample)
+
+def print_metadata(metadata, src=None):
+  if src:
+    print("-" * 10)
+    print("Source:", src)
+    print("-" * 10)
+  print(" - sample_rate:", metadata.sample_rate)
+  print(" - num_channels:", metadata.num_channels)
+  print(" - num_frames:", metadata.num_frames)
+  print(" - bits_per_sample:", metadata.bits_per_sample)
+  print(" - encoding:", metadata.encoding)
+  print()
+
+def print_stats(waveform, sample_rate=None, src=None):
+  if src:
+    print("-" * 10)
+    print("Source:", src)
+    print("-" * 10)
+  if sample_rate:
+    print("Sample Rate:", sample_rate)
+  print("Shape:", tuple(waveform.shape))
+  print("Dtype:", waveform.dtype)
+  print(f" - Max:     {waveform.max().item():6.3f}")
+  print(f" - Min:     {waveform.min().item():6.3f}")
+  print(f" - Mean:    {waveform.mean().item():6.3f}")
+  print(f" - Std Dev: {waveform.std().item():6.3f}")
+  print()
+  print(waveform)
+  print()
+
+def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None):
+  waveform = waveform.numpy()
+
+  num_channels, num_frames = waveform.shape
+  time_axis = torch.arange(0, num_frames) / sample_rate
+
+  figure, axes = plt.subplots(num_channels, 1)
+  if num_channels == 1:
+    axes = [axes]
+  for c in range(num_channels):
+    axes[c].plot(time_axis, waveform[c], linewidth=1)
+    axes[c].grid(True)
+    if num_channels > 1:
+      axes[c].set_ylabel(f'Channel {c+1}')
+    if xlim:
+      axes[c].set_xlim(xlim)
+    if ylim:
+      axes[c].set_ylim(ylim)
+  figure.suptitle(title)
+  plt.show(block=False)
+
+def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
+  waveform = waveform.numpy()
+
+  num_channels, num_frames = waveform.shape
+  time_axis = torch.arange(0, num_frames) / sample_rate
+
+  figure, axes = plt.subplots(num_channels, 1)
+  if num_channels == 1:
+    axes = [axes]
+  for c in range(num_channels):
+    axes[c].specgram(waveform[c], Fs=sample_rate)
+    if num_channels > 1:
+      axes[c].set_ylabel(f'Channel {c+1}')
+    if xlim:
+      axes[c].set_xlim(xlim)
+  figure.suptitle(title)
+  plt.show(block=False)
+
+def play_audio(waveform, sample_rate):
+  waveform = waveform.numpy()
+
+  num_channels, num_frames = waveform.shape
+  if num_channels == 1:
+    display(Audio(waveform[0], rate=sample_rate))
+  elif num_channels == 2:
+    display(Audio((waveform[0], waveform[1]), rate=sample_rate))
+  else:
+    raise ValueError("Waveform with more than 2 channels are not supported.")
+
+def inspect_file(path):
+  print("-" * 10)
+  print("Source:", path)
+  print("-" * 10)
+  print(f" - File size: {os.path.getsize(path)} bytes")
+  print_metadata(torchaudio.info(path))
+
+def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None):
+  fig, axs = plt.subplots(1, 1)
+  axs.set_title(title or 'Spectrogram (db)')
+  axs.set_ylabel(ylabel)
+  axs.set_xlabel('frame')
+  im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
+  if xmax:
+    axs.set_xlim((0, xmax))
+  fig.colorbar(im, ax=axs)
+  plt.show(block=False)
+
+def plot_mel_fbank(fbank, title=None):
+  fig, axs = plt.subplots(1, 1)
+  axs.set_title(title or 'Filter bank')
+  axs.imshow(fbank, aspect='auto')
+  axs.set_ylabel('frequency bin')
+  axs.set_xlabel('mel bin')
+  plt.show(block=False)
+
+def get_spectrogram(
+    n_fft = 400,
+    win_len = None,
+    hop_len = None,
+    power = 2.0,
+):
+  waveform, _ = get_speech_sample()
+  spectrogram = T.Spectrogram(
+      n_fft=n_fft,
+      win_length=win_len,
+      hop_length=hop_len,
+      center=True,
+      pad_mode="reflect",
+      power=power,
+  )
+  return spectrogram(waveform)
+
+def plot_pitch(waveform, sample_rate, pitch):
+  figure, axis = plt.subplots(1, 1)
+  axis.set_title("Pitch Feature")
+  axis.grid(True)
+
+  end_time = waveform.shape[1] / sample_rate
+  time_axis = torch.linspace(0, end_time,  waveform.shape[1])
+  axis.plot(time_axis, waveform[0], linewidth=1, color='gray', alpha=0.3)
+
+  axis2 = axis.twinx()
+  time_axis = torch.linspace(0, end_time, pitch.shape[1])
+  ln2 = axis2.plot(
+      time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
+
+  axis2.legend(loc=0)
+  plt.show(block=False)
+
+def plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc):
+  figure, axis = plt.subplots(1, 1)
+  axis.set_title("Kaldi Pitch Feature")
+  axis.grid(True)
+
+  end_time = waveform.shape[1] / sample_rate
+  time_axis = torch.linspace(0, end_time,  waveform.shape[1])
+  axis.plot(time_axis, waveform[0], linewidth=1, color='gray', alpha=0.3)
+
+  time_axis = torch.linspace(0, end_time, pitch.shape[1])
+  ln1 = axis.plot(time_axis, pitch[0], linewidth=2, label='Pitch', color='green')
+  axis.set_ylim((-1.3, 1.3))
+
+  axis2 = axis.twinx()
+  time_axis = torch.linspace(0, end_time, nfcc.shape[1])
+  ln2 = axis2.plot(
+      time_axis, nfcc[0], linewidth=2, label='NFCC', color='blue', linestyle='--')
+
+  lns = ln1 + ln2
+  labels = [l.get_label() for l in lns]
+  axis.legend(lns, labels, loc=0)
+  plt.show(block=False)
+
 
 ######################################################################
-# Opening a file
-# -----------------
+# Audio I/O
+# =========
 # 
-# ``torchaudio`` also supports loading sound files in the wav and mp3 format. We
-# call waveform the resulting raw audio signal.
+# torchaudio integrates ``libsox`` and provides a rich set of audio I/O.
 # 
 
-url = "https://pytorch.org/tutorials/_static/img/steam-train-whistle-daniel_simon-converted-from-mp3.wav"
-r = requests.get(url)
 
-with open('steam-train-whistle-daniel_simon-converted-from-mp3.wav', 'wb') as f:
-    f.write(r.content)
+######################################################################
+# Quering audio metadata
+# ----------------------
+# 
+# ``torchaudio.info`` function fetches metadata of audio. You can provide
+# a path-like object or file-like object.
+# 
 
-filename = "steam-train-whistle-daniel_simon-converted-from-mp3.wav"
-waveform, sample_rate = torchaudio.load(filename)
+metadata = torchaudio.info(SAMPLE_WAV_PATH)
+print_metadata(metadata, src=SAMPLE_WAV_PATH)
 
-print("Shape of waveform: {}".format(waveform.size()))
-print("Sample rate of waveform: {}".format(sample_rate))
 
-plt.figure()
-plt.plot(waveform.t().numpy())
+######################################################################
+# Where
+# 
+# -  ``sample_rate`` is the sampling rate of the audio
+# -  ``num_channels`` is the number of channels
+# -  ``num_frames`` is the number of frames per channel
+# -  ``bits_per_sample`` is bit depth
+# -  ``encoding`` is the sample coding format
+# 
+# The values ``encoding`` can take are one of the following
+# 
+# -  ``"PCM_S"``: Signed integer linear PCM
+# -  ``"PCM_U"``: Unsigned integer linear PCM
+# -  ``"PCM_F"``: Floating point linear PCM
+# -  ``"FLAC"``: Flac, `Free Lossless Audio
+#    Codec <https://xiph.org/flac/>`__
+# -  ``"ULAW"``: Mu-law,
+#    [`wikipedia <https://en.wikipedia.org/wiki/%CE%9C-law_algorithm>`__]
+# -  ``"ALAW"``: A-law
+#    [`wikipedia <https://en.wikipedia.org/wiki/A-law_algorithm>`__]
+# -  ``"MP3"`` : MP3, MPEG-1 Audio Layer III
+# -  ``"VORBIS"``: OGG Vorbis [`xiph.org <https://xiph.org/vorbis/>`__]
+# -  ``"AMR_NB"``: Adaptive Multi-Rate
+#    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_audio_codec>`__]
+# -  ``"AMR_WB"``: Adaptive Multi-Rate Wideband
+#    [`wikipedia <https://en.wikipedia.org/wiki/Adaptive_Multi-Rate_Wideband>`__]
+# -  ``"OPUS"``: Opus [`opus-codec.org <https://opus-codec.org/>`__]
+# -  ``"GSM"``: GSM-FR
+#    [`wikipedia <https://en.wikipedia.org/wiki/Full_Rate>`__]
+# -  ``"UNKNOWN"`` None of avobe
+# 
+
 
 ######################################################################
-# When you load a file in ``torchaudio``, you can optionally specify the backend to use either 
-# `SoX <https://pypi.org/project/sox/>`_ or `SoundFile <https://pypi.org/project/SoundFile/>`_ 
-# via ``torchaudio.set_audio_backend``. These backends are loaded lazily when needed.
+# **Note**
+# 
+# -  ``bits_per_sample`` can be ``0`` for formats with compression and/or
+#    variable bit rate. (such as mp3)
+# -  ``num_frames`` can be ``0`` for GSM-FR format.
 # 
-# ``torchaudio`` also makes JIT compilation optional for functions, and uses ``nn.Module`` where possible.
+
+metadata = torchaudio.info(SAMPLE_MP3_PATH)
+print_metadata(metadata, src=SAMPLE_MP3_PATH)
+
+metadata = torchaudio.info(SAMPLE_GSM_PATH)
+print_metadata(metadata, src=SAMPLE_GSM_PATH)
+
 
 ######################################################################
-# Transformations
-# ---------------
+# Querying file-like object
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
 # 
-# ``torchaudio`` supports a growing list of
-# `transformations <https://pytorch.org/audio/stable/transforms.html>`_.
+# ``info`` function works on file-like object as well.
 # 
-# -  **Resample**: Resample waveform to a different sample rate.
-# -  **Spectrogram**: Create a spectrogram from a waveform.
-# -  **GriffinLim**: Compute waveform from a linear scale magnitude spectrogram using 
-#    the Griffin-Lim transformation.
-# -  **ComputeDeltas**: Compute delta coefficients of a tensor, usually a spectrogram.
-# -  **ComplexNorm**: Compute the norm of a complex tensor.
-# -  **MelScale**: This turns a normal STFT into a Mel-frequency STFT,
-#    using a conversion matrix.
-# -  **AmplitudeToDB**: This turns a spectrogram from the
-#    power/amplitude scale to the decibel scale.
-# -  **MFCC**: Create the Mel-frequency cepstrum coefficients from a
-#    waveform.
-# -  **MelSpectrogram**: Create MEL Spectrograms from a waveform using the
-#    STFT function in PyTorch.
-# -  **MuLawEncoding**: Encode waveform based on mu-law companding.
-# -  **MuLawDecoding**: Decode mu-law encoded waveform.
-# -  **TimeStretch**: Stretch a spectrogram in time without modifying pitch for a given rate.
-# -  **FrequencyMasking**: Apply masking to a spectrogram in the frequency domain.
-# -  **TimeMasking**: Apply masking to a spectrogram in the time domain.
-#
-# Each transform supports batching: you can perform a transform on a single raw 
-# audio signal or spectrogram, or many of the same shape.
+
+with requests.get(SAMPLE_WAV_URL, stream=True) as response:
+  metadata = torchaudio.info(response.raw)
+print_metadata(metadata, src=SAMPLE_WAV_URL)
+
+
+######################################################################
+# **Note** When passing file-like object, ``info`` function does not read
+# all the data, instead it only reads the beginning portion of data.
+# Therefore, depending on the audio format, it cannot get the correct
+# metadata, including the format itself. The following example illustrates
+# this.
 # 
-# Since all transforms are ``nn.Modules`` or ``jit.ScriptModules``, they can be
-# used as part of a neural network at any point.
+# -  Use ``format`` argument to tell what audio format it is.
+# -  The returned metadata has ``num_frames = 0``
 # 
 
+with requests.get(SAMPLE_MP3_URL, stream=True) as response:
+  metadata = torchaudio.info(response.raw, format="mp3")
+
+  print(f"Fetched {response.raw.tell()} bytes.")
+print_metadata(metadata, src=SAMPLE_MP3_URL)
+
+
+######################################################################
+# Loading audio data into Tensor
+# ------------------------------
+# 
+# To load audio data, you can use ``torchaudio.load``.
+# 
+# This function accepts path-like object and file-like object.
+# 
+# The returned value is a tuple of waveform (``Tensor``) and sample rate
+# (``int``).
+# 
+# By default, the resulting tensor object has ``dtype=torch.float32`` and
+# its value range is normalized within ``[-1.0, 1.0]``.
+# 
+# For the list of supported format, please refer to `the torchaudio
+# documentation <https://pytorch.org/audio>`__.
+# 
+
+waveform, sample_rate = torchaudio.load(SAMPLE_WAV_SPEECH_PATH)
+
+print_stats(waveform, sample_rate=sample_rate)
+plot_waveform(waveform, sample_rate)
+plot_specgram(waveform, sample_rate)
+play_audio(waveform, sample_rate)
+
+
 
 ######################################################################
-# To start, we can look at the log of the spectrogram on a log scale.
+# Loading from file-like object
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+# 
+# ``torchaudio``\ ’s I/O functions now support file-like object. This
+# allows to fetch audio data and decode at the same time from the location
+# other than local file system. The following examples illustrates this.
+# 
+
+# Load audio data as HTTP request
+with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response:
+  waveform, sample_rate = torchaudio.load(response.raw)
+plot_specgram(waveform, sample_rate, title="HTTP datasource")
+
+# Load audio from tar file
+with tarfile.open(SAMPLE_TAR_PATH, mode='r') as tarfile_:
+  fileobj = tarfile_.extractfile(SAMPLE_TAR_ITEM)
+  waveform, sample_rate = torchaudio.load(fileobj)
+plot_specgram(waveform, sample_rate, title="TAR file")
+
+# Load audio from S3
+client = boto3.client('s3', config=Config(signature_version=UNSIGNED))
+response = client.get_object(Bucket=S3_BUCKET, Key=S3_KEY)
+waveform, sample_rate = torchaudio.load(response['Body'])
+plot_specgram(waveform, sample_rate, title="From S3")
+
+
+
+######################################################################
+# Tips on slicing
+# ~~~~~~~~~~~~~~~
+# 
+# Providing ``num_frames`` and ``frame_offset`` arguments will slice the
+# resulting Tensor object while decoding.
+# 
+# The same result can be achieved using the regular Tensor slicing,
+# (i.e. ``waveform[:, frame_offset:frame_offset+num_frames]``) however,
+# providing ``num_frames`` and ``frame_offset`` arguments is more
+# efficient.
+# 
+# This is because the function will stop data acquisition and decoding
+# once it finishes decoding the requested frames. This is advantageous
+# when the audio data are transfered via network as the data transfer will
+# stop as soon as the necessary amount of data is fetched.
+# 
+# The following example illustrates this;
 # 
 
-specgram = torchaudio.transforms.Spectrogram()(waveform)
+# Illustration of two different decoding methods.
+# The first one will fetch all the data and decode them, while
+# the second one will stop fetching data once it completes decoding.
+# The resulting waveforms are identical.
 
-print("Shape of spectrogram: {}".format(specgram.size()))
+frame_offset, num_frames = 16000, 16000  # Fetch and decode the 1 - 2 seconds
+
+print("Fetching all the data...")
+with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response:
+  waveform1, sample_rate1 = torchaudio.load(response.raw)
+  waveform1 = waveform1[:, frame_offset:frame_offset+num_frames]
+  print(f" - Fetched {response.raw.tell()} bytes")
+
+print("Fetching until the requested frames are available...")
+with requests.get(SAMPLE_WAV_SPEECH_URL, stream=True) as response:
+  waveform2, sample_rate2 = torchaudio.load(
+      response.raw, frame_offset=frame_offset, num_frames=num_frames)
+  print(f" - Fetched {response.raw.tell()} bytes")
+
+print("Checking the resulting waveform ... ", end="")
+assert (waveform1 == waveform2).all()
+print("matched!")
 
-plt.figure()
-plt.imshow(specgram.log2()[0,:,:].numpy(), cmap='gray')
 
 
 ######################################################################
-# Or we can look at the Mel Spectrogram on a log scale.
+# Saving audio to file
+# --------------------
+# 
+# To save audio data in the formats intepretable by common applications,
+# you can use ``torchaudio.save``.
+# 
+# This function accepts path-like object and file-like object.
+# 
+# When passing file-like object, you also need to provide ``format``
+# argument so that the function knows which format it should be using. In
+# case of path-like object, the function will detemine the format based on
+# the extension. If you are saving to a file without extension, you need
+# to provide ``format`` argument.
+# 
+# When saving as WAV format, the default encoding for ``float32`` Tensor
+# is 32-bit floating-point PCM. You can provide ``encoding`` and
+# ``bits_per_sample`` argument to change this. For example, to save data
+# in 16 bit signed integer PCM, you can do the following.
 # 
+# **Note** Saving data in encodings with lower bit depth reduces the
+# resulting file size but loses precision.
+# 
+
+waveform, sample_rate = get_sample()
+print_stats(waveform, sample_rate=sample_rate)
 
-specgram = torchaudio.transforms.MelSpectrogram()(waveform)
+# Save without any encoding option.
+# The function will pick up the encoding which
+# the provided data fit
+path = "save_example_default.wav"
+torchaudio.save(path, waveform, sample_rate)
+inspect_file(path)
 
-print("Shape of spectrogram: {}".format(specgram.size()))
+# Save as 16-bit signed integer Linear PCM
+# The resulting file occupies half the storage but loses precision
+path = "save_example_PCM_S16.wav"
+torchaudio.save(
+    path, waveform, sample_rate,
+    encoding="PCM_S", bits_per_sample=16)
+inspect_file(path)
 
-plt.figure()
-p = plt.imshow(specgram.log2()[0,:,:].detach().numpy(), cmap='gray')
 
 
 ######################################################################
-# We can resample the waveform, one channel at a time.
+# ``torchaudio.save`` can also handle other formats. To name a few;
 # 
 
-new_sample_rate = sample_rate/10
+waveform, sample_rate = get_sample()
 
-# Since Resample applies to a single channel, we resample first channel here
-channel = 0
-transformed = torchaudio.transforms.Resample(sample_rate, new_sample_rate)(waveform[channel,:].view(1,-1))
+formats = [
+  "mp3",
+  "flac",
+  "vorbis",
+  "sph",
+  "amb",
+  "amr-nb",
+  "gsm",
+]
 
-print("Shape of transformed waveform: {}".format(transformed.size()))
+for format in formats:
+  path = f"save_example.{format}"
+  torchaudio.save(path, waveform, sample_rate, format=format)
+  inspect_file(path)
 
-plt.figure()
-plt.plot(transformed[0,:].numpy())
 
 
 ######################################################################
-# As another example of transformations, we can encode the signal based on
-# Mu-Law enconding. But to do so, we need the signal to be between -1 and
-# 1. Since the tensor is just a regular PyTorch tensor, we can apply
-# standard operators on it.
+# Saving to file-like object
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
 # 
+# Similar to the other I/O functions, you can save audio into file-like
+# object. When saving to file-like object, ``format`` argument is
+# required.
+# 
+
+waveform, sample_rate = get_sample()
 
-# Let's check if the tensor is in the interval [-1,1]
-print("Min of waveform: {}\nMax of waveform: {}\nMean of waveform: {}".format(waveform.min(), waveform.max(), waveform.mean()))
+# Saving to Bytes buffer
+buffer_ = io.BytesIO()
+torchaudio.save(buffer_, waveform, sample_rate, format="wav")
+
+buffer_.seek(0)
+print(buffer_.read(16))
 
 
 ######################################################################
-# Since the waveform is already between -1 and 1, we do not need to
-# normalize it.
+# Data Augmentation
+# =================
+# 
+# ``torchaudio`` provides a variety of ways to augment audio data.
 # 
 
-def normalize(tensor):
-    # Subtract the mean, and scale to the interval [-1,1]
-    tensor_minusmean = tensor - tensor.mean()
-    return tensor_minusmean/tensor_minusmean.abs().max()
 
-# Let's normalize to the full interval [-1,1]
-# waveform = normalize(waveform)
+######################################################################
+# Applying effects and filtering
+# ------------------------------
+# 
+# ``torchaudio.sox_effects`` module provides ways to apply filiters like
+# ``sox`` command on Tensor objects and file-object audio sources
+# directly.
+# 
+# There are two functions for this;
+# 
+# -  ``torchaudio.sox_effects.apply_effects_tensor`` for applying effects
+#    on Tensor
+# -  ``torchaudio.sox_effects.apply_effects_file`` for applying effects on
+#    other audio source
+# 
+# Both function takes effects in the form of ``List[List[str]]``. This
+# mostly corresponds to how ``sox`` command works, but one caveat is that
+# ``sox`` command adds some effects automatically, but torchaudio’s
+# implementation does not do that.
+# 
+# For the list of available effects, please refer to `the sox
+# documentation <http://sox.sourceforge.net/sox.html>`__.
+# 
+# **Tip** If you need to load and resample your audio data on-the-fly,
+# then you can use ``torchaudio.sox_effects.apply_effects_file`` with
+# ``"rate"`` effect.
+# 
+# **Note** ``apply_effects_file`` accepts file-like object or path-like
+# object. Similar to ``torchaudio.load``, when the audio format cannot be
+# detected from either file extension or header, you can provide
+# ``format`` argument to tell what format the audio source is.
+# 
+# **Note** This process is not differentiable.
+# 
+
+# Load the data
+waveform1, sample_rate1 = get_sample(resample=16000)
+
+# Define effects
+effects = [
+  ["lowpass", "-1", "300"], # apply single-pole lowpass filter
+  ["speed", "0.8"],  # reduce the speed
+                     # This only changes sample rate, so it is necessary to 
+                     # add `rate` effect with original sample rate after this.
+  ["rate", f"{sample_rate1}"],
+  ["reverb", "-w"],  # Reverbration gives some dramatic feeling
+]
+
+# Apply effects
+waveform2, sample_rate2 = torchaudio.sox_effects.apply_effects_tensor(
+    waveform1, sample_rate1, effects)
+
+plot_waveform(waveform1, sample_rate1, title="Original", xlim=(-.1, 3.2))
+plot_waveform(waveform2, sample_rate2, title="Effects Applied", xlim=(-.1, 3.2))
+print_stats(waveform1, sample_rate=sample_rate1, src="Original")
+print_stats(waveform2, sample_rate=sample_rate2, src="Effects Applied")
+
 
 
 ######################################################################
-# Let’s apply encode the waveform.
+# Note that the number of frames and number of channels are different from
+# the original after the effects. Let’s listen to the audio. Doesn’t it
+# sound more dramatic?
 # 
 
-transformed = torchaudio.transforms.MuLawEncoding()(waveform)
+plot_specgram(waveform1, sample_rate1, title="Original", xlim=(0, 3.04))
+play_audio(waveform1, sample_rate1)
+plot_specgram(waveform2, sample_rate2, title="Effects Applied", xlim=(0, 3.04))
+play_audio(waveform2, sample_rate2)
+
 
-print("Shape of transformed waveform: {}".format(transformed.size()))
+######################################################################
+# Simulating room reverbration
+# ----------------------------
+# 
+# `Convolution
+# reverb <https://en.wikipedia.org/wiki/Convolution_reverb>`__ is a
+# technique used to make a clean audio data sound like in a different
+# environment.
+# 
+# Using Room Impulse Response (RIR), we can make a clean speech sound like
+# uttered in a conference room.
+# 
+# For this process, we need RIR data. The following data are from VOiCES
+# dataset, but you can record one by your self. Just turn on microphone
+# and clap you hands.
+# 
 
-plt.figure()
-plt.plot(transformed[0,:].numpy())
+sample_rate = 8000
+
+rir_raw, _ = get_rir_sample(resample=sample_rate)
+
+plot_waveform(rir_raw, sample_rate, title="Room Impulse Response (raw)", ylim=None)
+plot_specgram(rir_raw, sample_rate, title="Room Impulse Response (raw)")
+play_audio(rir_raw, sample_rate)
 
 
 ######################################################################
-# And now decode.
+# First, we need to clean up the RIR. We extract the main impulse,
+# normalize the signal power, then flip the time axis.
 # 
 
-reconstructed = torchaudio.transforms.MuLawDecoding()(transformed)
+rir = rir_raw[:, int(sample_rate*1.01):int(sample_rate*1.3)]
+rir = rir / torch.norm(rir, p=2)
+rir = torch.flip(rir, [1])
 
-print("Shape of recovered waveform: {}".format(reconstructed.size()))
+print_stats(rir)
+plot_waveform(rir, sample_rate, title="Room Impulse Response", ylim=None)
 
-plt.figure()
-plt.plot(reconstructed[0,:].numpy())
 
 
 ######################################################################
-# We can finally compare the original waveform with its reconstructed
-# version.
+# Then we convolve the speech signal with the RIR filter.
 # 
 
-# Compute median relative difference
-err = ((waveform-reconstructed).abs() / waveform.abs()).median()
+speech, _ = get_speech_sample(resample=sample_rate)
+
+speech_ = torch.nn.functional.pad(speech, (rir.shape[1]-1, 0))
+augmented = torch.nn.functional.conv1d(speech_[None, ...], rir[None, ...])[0]
 
-print("Median relative difference between original and MuLaw reconstucted signals: {:.2%}".format(err))
+plot_waveform(speech, sample_rate, title="Original", ylim=None)
+plot_waveform(augmented, sample_rate, title="RIR Applied", ylim=None)
+
+plot_specgram(speech, sample_rate, title="Original")
+play_audio(speech, sample_rate)
+
+plot_specgram(augmented, sample_rate, title="RIR Applied")
+play_audio(augmented, sample_rate)
 
 
 ######################################################################
-# Functional
-# ---------------
+# Adding background noise
+# -----------------------
+# 
+# To add background noise to audio data, you can simply add audio Tensor
+# and noise Tensor. A commonly way to adjust the intensity of noise is to
+# change Signal-to-Noise Ratio (SNR).
+# [`wikipedia <https://en.wikipedia.org/wiki/Signal-to-noise_ratio>`__]
+# 
+# .. math::
+# 
+# 
+#    \mathrm{SNR} = \frac{P_\mathrm{signal}}{P_\mathrm{noise}}
+# 
+# .. math::
+# 
+# 
+#    {\mathrm  {SNR_{{dB}}}}=10\log _{{10}}\left({\mathrm  {SNR}}\right)
+# 
+
+sample_rate = 8000
+speech, _ = get_speech_sample(resample=sample_rate)
+noise, _ = get_noise_sample(resample=sample_rate)
+noise = noise[:, :speech.shape[1]]
+
+plot_waveform(noise, sample_rate, title="Background noise")
+plot_specgram(noise, sample_rate, title="Background noise")
+play_audio(noise, sample_rate)
+
+speech_power = speech.norm(p=2)
+noise_power = noise.norm(p=2)
+
+for snr_db in [20, 10, 3]:
+  snr = math.exp(snr_db / 10)
+  scale = snr * noise_power / speech_power
+  noisy_speech = (scale * speech + noise) / 2
+
+  plot_waveform(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
+  plot_specgram(noisy_speech, sample_rate, title=f"SNR: {snr_db} [dB]")
+  play_audio(noisy_speech, sample_rate)
+
+
+
+######################################################################
+# Applying codec to Tensor object
+# -------------------------------
 # 
-# The transformations seen above rely on lower level stateless functions for their computations. 
-# These functions are available under ``torchaudio.functional``. The complete list is available 
-# `here <https://pytorch.org/audio/functional.html>`_ and includes:
-#
-# -  **istft**: Inverse short time Fourier Transform.
-# -  **gain**: Applies amplification or attenuation to the whole waveform.
-# -  **dither**: Increases the perceived dynamic range of audio stored at a
-#    particular bit-depth.
-# -  **compute_deltas**: Compute delta coefficients of a tensor.
-# -  **equalizer_biquad**: Design biquad peaking equalizer filter and perform filtering.
-# -  **lowpass_biquad**: Design biquad lowpass filter and perform filtering.
-# -  **highpass_biquad**:Design biquad highpass filter and perform filtering.
+# ``torchaudio.functional.apply_codec`` can apply codecs to Tensor object.
 # 
-# For example, let's try the `mu_law_encoding` functional:
+# **Note** This process is not differentiable.
+# 
+
+waveform, sample_rate = get_speech_sample(resample=8000)
+
+plot_specgram(waveform, sample_rate, title="Original")
+play_audio(waveform, sample_rate)
 
-mu_law_encoding_waveform = torchaudio.functional.mu_law_encoding(waveform, quantization_channels=256)
+configs = [
+    ({"format": "wav", "encoding": 'ULAW', "bits_per_sample": 8}, "8 bit mu-law"),
+    ({"format": "gsm"}, "GSM-FR"),
+    ({"format": "mp3", "compression": -9}, "MP3"),
+    ({"format": "vorbis", "compression": -1}, "Vorbis"),
+]
+for param, title in configs:
+  augmented = F.apply_codec(waveform, sample_rate, **param)
+  plot_specgram(augmented, sample_rate, title=title)
+  play_audio(augmented, sample_rate)
 
-print("Shape of transformed waveform: {}".format(mu_law_encoding_waveform.size()))
 
-plt.figure()
-plt.plot(mu_law_encoding_waveform[0,:].numpy())
 
 ######################################################################
-# You can see how the output from ``torchaudio.functional.mu_law_encoding`` is the same as 
-# the output from ``torchaudio.transforms.MuLawEncoding``.
-#
-# Now let's experiment with a few of the other functionals and visualize their output. Taking our 
-# spectogram, we can compute it's deltas:
+# Simulating a phone recoding
+# ---------------------------
+# 
+# Combining the previous techniques, we can simulate audio that sounds
+# like a person talking over a phone in a echoey room with people talking
+# in the background.
+# 
+
+sample_rate = 16000
+speech, _ = get_speech_sample(resample=sample_rate)
+
+plot_specgram(speech, sample_rate, title="Original")
+play_audio(speech, sample_rate)
+
+# Apply RIR
+rir, _ = get_rir_sample(resample=sample_rate, processed=True)
+speech_ = torch.nn.functional.pad(speech, (rir.shape[1]-1, 0))
+speech = torch.nn.functional.conv1d(speech_[None, ...], rir[None, ...])[0]
+
+plot_specgram(speech, sample_rate, title="RIR Applied")
+play_audio(speech, sample_rate)
+
+# Add background noise
+# Because the noise is recorded in the actual environment, we consider that 
+# the noise contains the acoustic feature of the environment. Therefore, we add
+# the noise after RIR application.
+noise, _ = get_noise_sample(resample=sample_rate)
+noise = noise[:, :speech.shape[1]]
+
+snr_db = 8
+scale = math.exp(snr_db / 10) * noise.norm(p=2) / speech.norm(p=2)
+speech = (scale * speech + noise) / 2
+
+plot_specgram(speech, sample_rate, title="BG noise added")
+play_audio(speech, sample_rate)
+
+# Apply filtering and change sample rate
+speech, sample_rate = torchaudio.sox_effects.apply_effects_tensor(
+  speech,
+  sample_rate,
+  effects=[
+      ["lowpass", "4000"],
+      ["compand", "0.02,0.05", "-60,-60,-30,-10,-20,-8,-5,-8,-2,-8", "-8", "-7", "0.05"],
+      ["rate", "8000"],
+  ],
+)
+
+plot_specgram(speech, sample_rate, title="Filtered")
+play_audio(speech, sample_rate)
+
+# Apply telephony codec
+speech = F.apply_codec(speech, sample_rate, format="gsm")
+
+plot_specgram(speech, sample_rate, title="GSM Codec Applied")
+play_audio(speech, sample_rate)
 
-computed = torchaudio.functional.compute_deltas(specgram.contiguous(), win_length=3)
-print("Shape of computed deltas: {}".format(computed.shape))
 
-plt.figure()
-plt.imshow(computed.log2()[0,:,:].detach().numpy(), cmap='gray')
 
 ######################################################################
-# We can take the original waveform and apply different effects to it.
-#
+# Feature Extractions
+# ===================
+# 
+# ``torchaudio`` implements feature extractions commonly used in audio
+# domain. They are available in ``torchaudio.functional`` and
+# ``torchaudio.transforms``.
+# 
+# ``functional`` module implements features as a stand alone functions.
+# They are stateless.
+# 
+# ``transforms`` module implements features in object-oriented manner,
+# using implementations from ``functional`` and ``torch.nn.Module``.
+# 
+# Because all the transforms are subclass of ``torch.nn.Module``, they can
+# be serialized using TorchScript.
+# 
+# For the complete list of available features, please refer to the
+# documentation. In this tutorial, we will look into conversion between
+# time domain and frequency domain (``Spectrogram``, ``GriffinLim``,
+# ``MelSpectrogram``) and augmentation technique called SpecAugment.
+# 
+
+
+######################################################################
+# Spectrogram
+# -----------
+# 
+# To get the frequency representation of audio signal, you can use
+# ``Spectrogram`` transform.
+# 
+
+waveform, sample_rate = get_speech_sample()
+
+n_fft = 1024
+win_length = None
+hop_length = 512
+
+# define transformation
+spectrogram = T.Spectrogram(
+    n_fft=n_fft,
+    win_length=win_length,
+    hop_length=hop_length,
+    center=True,
+    pad_mode="reflect",
+    power=2.0,
+)
+# Perform transformation
+spec = spectrogram(waveform)
+
+print_stats(spec)
+plot_spectrogram(spec[0], title='torchaudio')
+
+
+
+######################################################################
+# GriffinLim
+# ----------
+# 
+# To recover a waveform from spectrogram, you can use ``GriffinLim``.
+# 
+
+torch.random.manual_seed(0)
+waveform, sample_rate = get_speech_sample()
+plot_waveform(waveform, sample_rate, title="Original")
+play_audio(waveform, sample_rate)
+
+n_fft = 1024
+win_length = None
+hop_length = 512
+
+spec = T.Spectrogram(
+    n_fft=n_fft,
+    win_length=win_length,
+    hop_length=hop_length,
+)(waveform)
+
+griffin_lim = T.GriffinLim(
+    n_fft=n_fft,
+    win_length=win_length,
+    hop_length=hop_length,
+)
+waveform = griffin_lim(spec)
+
+plot_waveform(waveform, sample_rate, title="Reconstructed")
+play_audio(waveform, sample_rate)
+
+
+
+######################################################################
+# Mel Filter Bank
+# ---------------
+# 
+# ``torchaudio.functional.create_fb_matrix`` can generate the filter bank
+# to convert frequency bins to Mel-scale bins.
+# 
+# Since this function does not require input audio/features, there is no
+# equivalent transform in ``torchaudio.transforms``.
+# 
+
+n_fft = 256
+n_mels = 64
+sample_rate = 6000
+
+mel_filters = F.create_fb_matrix(
+    int(n_fft // 2 + 1),
+    n_mels=n_mels,
+    f_min=0.,
+    f_max=sample_rate/2.,
+    sample_rate=sample_rate,
+    norm='slaney'
+)
+plot_mel_fbank(mel_filters, "Mel Filter Bank - torchaudio")
 
-gain_waveform = torchaudio.functional.gain(waveform, gain_db=5.0)
-print("Min of gain_waveform: {}\nMax of gain_waveform: {}\nMean of gain_waveform: {}".format(gain_waveform.min(), gain_waveform.max(), gain_waveform.mean()))
 
-dither_waveform = torchaudio.functional.dither(waveform)
-print("Min of dither_waveform: {}\nMax of dither_waveform: {}\nMean of dither_waveform: {}".format(dither_waveform.min(), dither_waveform.max(), dither_waveform.mean()))
 
 ######################################################################
-# Another example of the capabilities in ``torchaudio.functional`` are applying filters to our
-# waveform. Applying the lowpass biquad filter to our waveform will output a new waveform with 
-# the signal of the frequency modified.
+# Comparison against librosa
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
+# 
+# As a comparison, here is the equivalent way to get the mel filter bank
+# with ``librosa``.
+# 
+# **Note** Currently, the result matches only when ``htk=True``.
+# ``torchaudio`` does not support the equivalent of ``htk=False`` option.
+# 
 
-lowpass_waveform = torchaudio.functional.lowpass_biquad(waveform, sample_rate, cutoff_freq=3000)
+mel_filters_librosa = librosa.filters.mel(
+    sample_rate,
+    n_fft,
+    n_mels=n_mels,
+    fmin=0.,
+    fmax=sample_rate/2.,
+    norm='slaney',
+    htk=True,
+).T
 
-print("Min of lowpass_waveform: {}\nMax of lowpass_waveform: {}\nMean of lowpass_waveform: {}".format(lowpass_waveform.min(), lowpass_waveform.max(), lowpass_waveform.mean()))
+plot_mel_fbank(mel_filters_librosa, "Mel Filter Bank - librosa")
+
+mse = torch.square(mel_filters - mel_filters_librosa).mean().item()
+print('Mean Square Difference: ', mse)
 
-plt.figure()
-plt.plot(lowpass_waveform.t().numpy())
 
 ######################################################################
-# We can also visualize a waveform with the highpass biquad filter.
+# MelSpectrogram
+# --------------
+# 
+# Mel-scale spectrogram is a combination of Spectrogram and mel scale
+# conversion. In ``torchaudio``, there is a transform ``MelSpectrogram``
+# which is composed of ``Spectrogram`` and ``MelScale``.
 # 
 
-highpass_waveform = torchaudio.functional.highpass_biquad(waveform, sample_rate, cutoff_freq=2000)
+waveform, sample_rate = get_speech_sample()
+
+n_fft = 1024
+win_length = None
+hop_length = 512
+n_mels = 128
+
+mel_spectrogram = T.MelSpectrogram(
+    sample_rate=sample_rate,
+    n_fft=n_fft,
+    win_length=win_length,
+    hop_length=hop_length,
+    center=True,
+    pad_mode="reflect",
+    power=2.0,
+    norm='slaney',
+    onesided=True,
+    n_mels=n_mels,
+)
 
-print("Min of highpass_waveform: {}\nMax of highpass_waveform: {}\nMean of highpass_waveform: {}".format(highpass_waveform.min(), highpass_waveform.max(), highpass_waveform.mean()))
+melspec = mel_spectrogram(waveform)
+plot_spectrogram(
+    melspec[0], title="MelSpectrogram - torchaudio", ylabel='mel freq')
 
-plt.figure()
-plt.plot(highpass_waveform.t().numpy())
 
 
 ######################################################################
-# Migrating to torchaudio from Kaldi
-# ----------------------------------
+# Comparison against librosa
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~
 # 
-# Users may be familiar with
-# `Kaldi <http://github.com/kaldi-asr/kaldi>`_, a toolkit for speech
-# recognition. ``torchaudio`` offers compatibility with it in
-# ``torchaudio.kaldi_io``. It can indeed read from kaldi scp, or ark file
-# or streams with:
+# As a comparison, here is the equivalent way to get Mel-scale spectrogram
+# with ``librosa``.
 # 
-# -  read_vec_int_ark
-# -  read_vec_flt_scp
-# -  read_vec_flt_arkfile/stream
-# -  read_mat_scp
-# -  read_mat_ark
+# **Note** Currently, the result matches only when ``htk=True``.
+# ``torchaudio`` does not support the equivalent of ``htk=False`` option.
 # 
-# ``torchaudio`` provides Kaldi-compatible transforms for ``spectrogram``,
-# ``fbank``, ``mfcc``, and ``resample_waveform with the benefit of GPU support, see
-# `here <compliance.kaldi.html>`__ for more information.
+
+melspec_librosa = librosa.feature.melspectrogram(
+    waveform.numpy()[0],
+    sr=sample_rate,
+    n_fft=n_fft,
+    hop_length=hop_length,
+    win_length=win_length,
+    center=True,
+    pad_mode="reflect",
+    power=2.0,
+    n_mels=n_mels,
+    norm='slaney',
+    htk=True,
+)
+plot_spectrogram(
+    melspec_librosa, title="MelSpectrogram - librosa", ylabel='mel freq')
+
+mse = torch.square(melspec - melspec_librosa).mean().item()
+print('Mean Square Difference: ', mse)
+
+
+######################################################################
+# MFCC
+# ----
 # 
 
-n_fft = 400.0
-frame_length = n_fft / sample_rate * 1000.0
-frame_shift = frame_length / 2.0
+waveform, sample_rate = get_speech_sample()
+
+n_fft = 2048
+win_length = None
+hop_length = 512
+n_mels = 256
+n_mfcc = 256
 
-params = {
-    "channel": 0,
-    "dither": 0.0,
-    "window_type": "hanning",
-    "frame_length": frame_length,
-    "frame_shift": frame_shift,
-    "remove_dc_offset": False,
-    "round_to_power_of_two": False,
-    "sample_frequency": sample_rate,
-}
+mfcc_transform = T.MFCC(
+    sample_rate=sample_rate,
+    n_mfcc=n_mfcc, melkwargs={'n_fft': n_fft, 'n_mels': n_mels, 'hop_length': hop_length})
 
-specgram = torchaudio.compliance.kaldi.spectrogram(waveform, **params)
+mfcc = mfcc_transform(waveform)
 
-print("Shape of spectrogram: {}".format(specgram.size()))
+plot_spectrogram(mfcc[0])
 
-plt.figure()
-plt.imshow(specgram.t().numpy(), cmap='gray')
 
 
 ######################################################################
-# We also support computing the filterbank features from waveforms,
-# matching Kaldi’s implementation.
+# Comparing against librosa
+# ~~~~~~~~~~~~~~~~~~~~~~~~~
 # 
 
-fbank = torchaudio.compliance.kaldi.fbank(waveform, **params)
+melspec = librosa.feature.melspectrogram(
+  y=waveform.numpy()[0], sr=sample_rate, n_fft=n_fft,
+  win_length=win_length, hop_length=hop_length,
+  n_mels=n_mels, htk=True, norm=None)
 
-print("Shape of fbank: {}".format(fbank.size()))
+mfcc_librosa = librosa.feature.mfcc(
+  S=librosa.core.spectrum.power_to_db(melspec),
+  n_mfcc=n_mfcc, dct_type=2, norm='ortho')
 
-plt.figure()
-plt.imshow(fbank.t().numpy(), cmap='gray')
+plot_spectrogram(mfcc_librosa)
+
+mse = torch.square(mfcc - mfcc_librosa).mean().item()
+print('Mean Square Difference: ', mse)
 
 
 ######################################################################
-# You can create mel frequency cepstral coefficients from a raw audio signal
-# This matches the input/output of Kaldi’s compute-mfcc-feats.
+# Pitch
+# -----
+# 
+
+waveform, sample_rate = get_speech_sample()
+
+pitch = F.detect_pitch_frequency(waveform, sample_rate)
+plot_pitch(waveform, sample_rate, pitch)
+play_audio(waveform, sample_rate)
+
+
+######################################################################
+# Kaldi Pitch (beta)
+# ------------------
+# 
+# Kaldi Pitch feature [1] is pitch detection mechanism tuned for ASR
+# application. This is a beta feature in torchaudio, and only
+# ``functional`` form is available.
+# 
+# 1. A pitch extraction algorithm tuned for automatic speech recognition
+# 
+#    Ghahremani, B. BabaAli, D. Povey, K. Riedhammer, J. Trmal and S.
+#    Khudanpur
+# 
+#    2014 IEEE International Conference on Acoustics, Speech and Signal
+#    Processing (ICASSP), Florence, 2014, pp. 2494-2498, doi:
+#    10.1109/ICASSP.2014.6854049.
+#    [`abstract <https://ieeexplore.ieee.org/document/6854049>`__],
+#    [`paper <https://danielpovey.com/files/2014_icassp_pitch.pdf>`__]
 # 
 
-mfcc = torchaudio.compliance.kaldi.mfcc(waveform, **params)
+waveform, sample_rate = get_speech_sample(resample=16000)
 
-print("Shape of mfcc: {}".format(mfcc.size()))
+pitch_feature = F.compute_kaldi_pitch(waveform, sample_rate)
+pitch, nfcc = pitch_feature[..., 0], pitch_feature[..., 1]
 
-plt.figure()
-plt.imshow(mfcc.t().numpy(), cmap='gray')
+plot_kaldi_pitch(waveform, sample_rate, pitch, nfcc)
+play_audio(waveform, sample_rate)
 
 
 ######################################################################
-# Available Datasets
-# -----------------
+# Feature Augmentation
+# ====================
 # 
-# If you do not want to create your own dataset to train your model, ``torchaudio`` offers a
-# unified dataset interface. This interface supports lazy-loading of files to memory, download 
-# and extract functions, and datasets to build models.
+
+
+######################################################################
+# SpecAugment
+# -----------
+# 
+# `SpecAugment <https://ai.googleblog.com/2019/04/specaugment-new-data-augmentation.html>`__
+# is a popular augmentation technique applied on spectrogram.
 # 
-# The datasets ``torchaudio`` currently supports are:
-#
-# -  **VCTK**: Speech data uttered by 109 native speakers of English with various accents
-#    (`Read more here <https://homepages.inf.ed.ac.uk/jyamagis/page3/page58/page58.html>`_).
-# -  **Yesno**: Sixty recordings of one individual saying yes or no in Hebrew; each
-#    recording is eight words long (`Read more here <https://www.openslr.org/1/>`_).
-# -  **Common Voice**: An open source, multi-language dataset of voices that anyone can use
-#    to train speech-enabled applications (`Read more here <https://voice.mozilla.org/en/datasets>`_).
-# -  **LibriSpeech**: Large-scale (1000 hours) corpus of read English speech (`Read more here <http://www.openslr.org/12>`_).
+# ``torchaudio`` implements ``TimeStrech``, ``TimeMasking`` and
+# ``FrequencyMasking``.
 # 
 
-yesno_data = torchaudio.datasets.YESNO('./', download=True)
 
-# A data point in Yesno is a tuple (waveform, sample_rate, labels) where labels is a list of integers with 1 for yes and 0 for no.
+######################################################################
+# TimeStrech
+# ~~~~~~~~~~
+# 
+
+spec = get_spectrogram(power=None)
+strech = T.TimeStretch()
+
+rate = 1.2
+spec_ = strech(spec, rate)
+plot_spectrogram(F.complex_norm(spec_[0]), title=f"Stretched x{rate}", aspect='equal', xmax=304)
+
+plot_spectrogram(F.complex_norm(spec[0]), title="Original", aspect='equal', xmax=304)
+
+rate = 0.9
+spec_ = strech(spec, rate)
+plot_spectrogram(F.complex_norm(spec_[0]), title=f"Stretched x{rate}", aspect='equal', xmax=304)
+
+
+######################################################################
+# TimeMasking
+# ~~~~~~~~~~~
+# 
+
+torch.random.manual_seed(4)
 
-# Pick data point number 3 to see an example of the the yesno_data:
-n = 3
-waveform, sample_rate, labels = yesno_data[n]
+spec = get_spectrogram()
+plot_spectrogram(spec[0], title="Original")
 
-print("Waveform: {}\nSample rate: {}\nLabels: {}".format(waveform, sample_rate, labels))
+masking = T.TimeMasking(time_mask_param=80)
+spec = masking(spec)
 
-plt.figure()
-plt.plot(waveform.t().numpy())
+plot_spectrogram(spec[0], title="Masked along time axis")
 
 
 ######################################################################
-# Now, whenever you ask for a sound file from the dataset, it is loaded in memory only when you ask for it.
-# Meaning, the dataset only loads and keeps in memory the items that you want and use, saving on memory.
-#
+# FrequencyMasking
+# ~~~~~~~~~~~~~~~~
+# 
+
+torch.random.manual_seed(4)
+
+spec = get_spectrogram()
+plot_spectrogram(spec[0], title="Original")
+
+masking = T.FrequencyMasking(freq_mask_param=80)
+spec = masking(spec)
+
+plot_spectrogram(spec[0], title="Masked along frequency axis")
+
 
 ######################################################################
-# Conclusion
-# ----------
+# Datasets
+# ========
+# 
+# ``torchaudio`` provides easy access to common, publicly accessible
+# datasets. Please checkout the official documentation for the list of
+# available datasets.
 # 
-# We used an example raw audio signal, or waveform, to illustrate how to
-# open an audio file using ``torchaudio``, and how to pre-process,
-# transform, and apply functions to such waveform. We also demonstrated how
-# to use familiar Kaldi functions, as well as utilize built-in datasets to 
-# construct our models. Given that ``torchaudio`` is built on PyTorch,
-# these techniques can be used as building blocks for more advanced audio
-# applications, such as speech recognition, while leveraging GPUs.
+# Here, we take ``YESNO`` dataset and look into how to use it.
 # 
+
+YESNO_DOWNLOAD_PROCESS.join()
+
+dataset = torchaudio.datasets.YESNO(YESNO_DATASET_PATH, download=True)
+
+for i in [1, 3, 5]:
+  waveform, sample_rate, label = dataset[i]
+  plot_specgram(waveform, sample_rate, title=f"Sample {i}: {label}")
+  play_audio(waveform, sample_rate)
diff --git a/requirements.txt b/requirements.txt
index 83b64c00064..d55369996a0 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -23,8 +23,11 @@ ray[tune]
 ipython
 
 # to run examples
+boto3
 pandas
+requests
 scikit-image
+scipy
 pillow==8.1.0
 wget
 gym

From b5d9d7c14ef7d939017447f967740b7280e898bc Mon Sep 17 00:00:00 2001
From: Guanheng George Zhang <6156351+zhangguanheng66@users.noreply.github.com>
Date: Thu, 4 Mar 2021 00:07:18 -0500
Subject: [PATCH 03/13] [1.8 release] Switch to the new datasets in torchtext
 0.9.0 release - text classification tutorial (#1352)

* switch to the new dataset API

* checkpoint

* checkpoint

* checkpoint

* update docs

* checkpoint

* switch to legacy vocab

* update to follow the master API

* checkpoint

* checkpoint

* address reviewer's comments

Co-authored-by: Guanheng Zhang <zhangguanheng@devfair0197.h2.fair>
Co-authored-by: Brian Johnson <brianjo@fb.com>
---
 .../text_sentiment_ngrams_tutorial.py         | 503 ++++++++++--------
 1 file changed, 276 insertions(+), 227 deletions(-)

diff --git a/beginner_source/text_sentiment_ngrams_tutorial.py b/beginner_source/text_sentiment_ngrams_tutorial.py
index d842a058325..67108e69877 100644
--- a/beginner_source/text_sentiment_ngrams_tutorial.py
+++ b/beginner_source/text_sentiment_ngrams_tutorial.py
@@ -1,66 +1,133 @@
 """
-Text Classification with TorchText
+Text classification with the torchtext library
 ==================================
 
-This tutorial shows how to use the text classification datasets
-in ``torchtext``, including
+In this tutorial, we will show how to use the torchtext library to build the dataset for the text classification analysis. Users will have the flexibility to
 
-::
+   - Access to the raw data as an iterator
+   - Build data processing pipeline to convert the raw text strings into ``torch.Tensor`` that can be used to train the model
+   - Shuffle and iterate the data with `torch.utils.data.DataLoader <https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader>`__
+"""
 
-   - AG_NEWS,
-   - SogouNews,
-   - DBpedia,
-   - YelpReviewPolarity,
-   - YelpReviewFull,
-   - YahooAnswers,
-   - AmazonReviewPolarity,
-   - AmazonReviewFull
 
-This example shows how to train a supervised learning algorithm for
-classification using one of these ``TextClassification`` datasets.
+######################################################################
+# Access to the raw dataset iterators
+# -----------------------------------
+#
+# The torchtext library provides a few raw dataset iterators, which yield the raw text strings. For example, the ``AG_NEWS`` dataset iterators yield the raw data as a tuple of label and text.
 
-Load data with ngrams
----------------------
+import torch
+from torchtext.datasets import AG_NEWS
+train_iter = AG_NEWS(split='train')
 
-A bag of ngrams feature is applied to capture some partial information
-about the local word order. In practice, bi-gram or tri-gram are applied
-to provide more benefits as word groups than only one word. An example:
 
-::
+######################################################################
+# ::
+#
+#     next(train_iter)
+#     >>> (3, "Wall St. Bears Claw Back Into the Black (Reuters) Reuters - 
+#     Short-sellers, Wall Street's dwindling\\band of ultra-cynics, are seeing green 
+#     again.")
+# 
+#     next(train_iter)
+#     >>> (3, 'Carlyle Looks Toward Commercial Aerospace (Reuters) Reuters - Private 
+#     investment firm Carlyle Group,\\which has a reputation for making well-timed 
+#     and occasionally\\controversial plays in the defense industry, has quietly 
+#     placed\\its bets on another part of the market.')
+# 
+#     next(train_iter)
+#     >>> (3, "Oil and Economy Cloud Stocks' Outlook (Reuters) Reuters - Soaring 
+#     crude prices plus worries\\about the economy and the outlook for earnings are 
+#     expected to\\hang over the stock market next week during the depth of 
+#     the\\summer doldrums.")
+#
 
-   "load data with ngrams"
-   Bi-grams results: "load data", "data with", "with ngrams"
-   Tri-grams results: "load data with", "data with ngrams"
 
-``TextClassification`` Dataset supports the ngrams method. By setting
-ngrams to 2, the example text in the dataset will be a list of single
-words plus bi-grams string.
+######################################################################
+# Prepare data processing pipelines
+# ---------------------------------
+#
+# We have revisited the very basic components of the torchtext library, including vocab, word vectors, tokenizer. Those are the basic data processing building blocks for raw text string.
+#
+# Here is an example for typical NLP data processing with tokenizer and vocabulary. The first step is to build a vocabulary with the raw training dataset. Users can have a customized vocab by setting up arguments in the constructor of the Vocab class. For example, the minimum frequency ``min_freq`` for the tokens to be included.
 
-"""
 
-import torch
-import torchtext
-from torchtext.datasets import text_classification
-NGRAMS = 2
-import os
-if not os.path.isdir('./.data'):
-	os.mkdir('./.data')
-train_dataset, test_dataset = text_classification.DATASETS['AG_NEWS'](
-    root='./.data', ngrams=NGRAMS, vocab=None)
-BATCH_SIZE = 16
+from torchtext.data.utils import get_tokenizer
+from collections import Counter
+from torchtext.vocab import Vocab
+
+tokenizer = get_tokenizer('basic_english')
+train_iter = AG_NEWS(split='train')
+counter = Counter()
+for (label, line) in train_iter:
+    counter.update(tokenizer(line))
+vocab = Vocab(counter, min_freq=1)
+
+
+######################################################################
+# The vocabulary block converts a list of tokens into integers.
+#
+# ::
+#
+#     [vocab[token] for token in ['here', 'is', 'an', 'example']]
+#     >>> [476, 22, 31, 5298]
+#
+# Prepare the text processing pipeline with the tokenizer and vocabulary. The text and label pipelines will be used to process the raw data strings from the dataset iterators.
+
+text_pipeline = lambda x: [vocab[token] for token in tokenizer(x)]
+label_pipeline = lambda x: int(x) - 1
+
+
+######################################################################
+# The text pipeline converts a text string into a list of integers based on the lookup table defined in the vocabulary. The label pipeline converts the label into integers. For example,
+#
+# ::
+#
+#     text_pipeline('here is the an example')
+#     >>> [475, 21, 2, 30, 5286]
+#     label_pipeline('10')
+#     >>> 9
+#
+
+
+
+######################################################################
+# Generate data batch and iterator 
+# --------------------------------
+#
+# `torch.utils.data.DataLoader <https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader>`__
+# is recommended for PyTorch users (a tutorial is `here <https://pytorch.org/tutorials/beginner/data_loading_tutorial.html>`__).
+# It works with a map-style dataset that implements the ``getitem()`` and ``len()`` protocols, and represents a map from indices/keys to data samples. It also works with an iterable datasets with the shuffle argumnent of ``False``.
+#
+# Before sending to the model, ``collate_fn`` function works on a batch of samples generated from ``DataLoader``. The input to ``collate_fn`` is a batch of data with the batch size in ``DataLoader``, and ``collate_fn`` processes them according to the data processing pipelines declared previouly. Pay attention here and make sure that ``collate_fn`` is declared as a top level def. This ensures that the function is available in each worker.
+#
+# In this example, the text entries in the original data batch input are packed into a list and concatenated as a single tensor for the input of ``nn.EmbeddingBag``. The offset is a tensor of delimiters to represent the beginning index of the individual sequence in the text tensor. Label is a tensor saving the labels of indidividual text entries.
+
+
+from torch.utils.data import DataLoader
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 
+def collate_batch(batch):
+    label_list, text_list, offsets = [], [], [0]
+    for (_label, _text) in batch:
+         label_list.append(label_pipeline(_label))
+         processed_text = torch.tensor(text_pipeline(_text), dtype=torch.int64)
+         text_list.append(processed_text)
+         offsets.append(processed_text.size(0))
+    label_list = torch.tensor(label_list, dtype=torch.int64)
+    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
+    text_list = torch.cat(text_list)
+    return label_list.to(device), text_list.to(device), offsets.to(device)    
+
+train_iter = AG_NEWS(split='train')
+dataloader = DataLoader(train_iter, batch_size=8, shuffle=False, collate_fn=collate_batch)
+
 
 ######################################################################
 # Define the model
 # ----------------
 #
-# The model is composed of the
-# `EmbeddingBag <https://pytorch.org/docs/stable/nn.html?highlight=embeddingbag#torch.nn.EmbeddingBag>`__
-# layer and the linear layer (see the figure below). ``nn.EmbeddingBag``
-# computes the mean value of a “bag” of embeddings. The text entries here
-# have different lengths. ``nn.EmbeddingBag`` requires no padding here
-# since the text lengths are saved in offsets.
+# The model is composed of the `nn.EmbeddingBag <https://pytorch.org/docs/stable/nn.html?highlight=embeddingbag#torch.nn.EmbeddingBag>`__ layer plus a linear layer for the classification purpose. ``nn.EmbeddingBag`` with the default mode of "mean" computes the mean value of a “bag” of embeddings. Although the text entries here have different lengths, nn.EmbeddingBag module requires no padding here since the text lengths are saved in offsets.
 #
 # Additionally, since ``nn.EmbeddingBag`` accumulates the average across
 # the embeddings on the fly, ``nn.EmbeddingBag`` can enhance the
@@ -69,11 +136,12 @@
 # .. image:: ../_static/img/text_sentiment_ngrams_model.png
 #
 
-import torch.nn as nn
-import torch.nn.functional as F
-class TextSentiment(nn.Module):
+from torch import nn
+
+class TextClassificationModel(nn.Module):
+
     def __init__(self, vocab_size, embed_dim, num_class):
-        super().__init__()
+        super(TextClassificationModel, self).__init__()
         self.embedding = nn.EmbeddingBag(vocab_size, embed_dim, sparse=True)
         self.fc = nn.Linear(embed_dim, num_class)
         self.init_weights()
@@ -93,8 +161,7 @@ def forward(self, text, offsets):
 # Initiate an instance
 # --------------------
 #
-# The AG_NEWS dataset has four labels and therefore the number of classes
-# is four.
+# The ``AG_NEWS`` dataset has four labels and therefore the number of classes is four.
 #
 # ::
 #
@@ -103,51 +170,14 @@ def forward(self, text, offsets):
 #    3 : Business
 #    4 : Sci/Tec
 #
-# The vocab size is equal to the length of vocab (including single word
-# and ngrams). The number of classes is equal to the number of labels,
-# which is four in AG_NEWS case.
+# We build a model with the embedding dimension of 64. The vocab size is equal to the length of the vocabulary instance. The number of classes is equal to the number of labels,
 #
 
-VOCAB_SIZE = len(train_dataset.get_vocab())
-EMBED_DIM = 32
-NUN_CLASS = len(train_dataset.get_labels())
-model = TextSentiment(VOCAB_SIZE, EMBED_DIM, NUN_CLASS).to(device)
-
-
-######################################################################
-# Functions used to generate batch
-# --------------------------------
-#
-
-
-######################################################################
-# Since the text entries have different lengths, a custom function
-# generate_batch() is used to generate data batches and offsets. The
-# function is passed to ``collate_fn`` in ``torch.utils.data.DataLoader``.
-# The input to ``collate_fn`` is a list of tensors with the size of
-# batch_size, and the ``collate_fn`` function packs them into a
-# mini-batch. Pay attention here and make sure that ``collate_fn`` is
-# declared as a top level def. This ensures that the function is available
-# in each worker.
-#
-# The text entries in the original data batch input are packed into a list
-# and concatenated as a single tensor as the input of ``nn.EmbeddingBag``.
-# The offsets is a tensor of delimiters to represent the beginning index
-# of the individual sequence in the text tensor. Label is a tensor saving
-# the labels of individual text entries.
-#
-
-def generate_batch(batch):
-    label = torch.tensor([entry[0] for entry in batch])
-    text = [entry[1] for entry in batch]
-    offsets = [0] + [len(entry) for entry in text]
-    # torch.Tensor.cumsum returns the cumulative sum
-    # of elements in the dimension dim.
-    # torch.Tensor([1.0, 2.0, 3.0]).cumsum(dim=0)
-
-    offsets = torch.tensor(offsets[:-1]).cumsum(dim=0)
-    text = torch.cat(text)
-    return text, offsets, label
+train_iter = AG_NEWS(split='train')
+num_class = len(set([label for (label, text) in train_iter]))
+vocab_size = len(vocab)
+emsize = 64
+model = TextClassificationModel(vocab_size, emsize, num_class).to(device)
 
 
 ######################################################################
@@ -156,144 +186,170 @@ def generate_batch(batch):
 #
 
 
-######################################################################
-# `torch.utils.data.DataLoader <https://pytorch.org/docs/stable/data.html?highlight=dataloader#torch.utils.data.DataLoader>`__
-# is recommended for PyTorch users, and it makes data loading in parallel
-# easily (a tutorial is
-# `here <https://pytorch.org/tutorials/beginner/data_loading_tutorial.html>`__).
-# We use ``DataLoader`` here to load AG_NEWS datasets and send it to the
-# model for training/validation.
-#
-
-from torch.utils.data import DataLoader
+import time
 
-def train_func(sub_train_):
+def train(dataloader):
+    model.train()
+    total_acc, total_count = 0, 0
+    log_interval = 500
+    start_time = time.time()
 
-    # Train the model
-    train_loss = 0
-    train_acc = 0
-    data = DataLoader(sub_train_, batch_size=BATCH_SIZE, shuffle=True,
-                      collate_fn=generate_batch)
-    for i, (text, offsets, cls) in enumerate(data):
+    for idx, (label, text, offsets) in enumerate(dataloader):
         optimizer.zero_grad()
-        text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)
-        output = model(text, offsets)
-        loss = criterion(output, cls)
-        train_loss += loss.item()
+        predited_label = model(text, offsets)
+        loss = criterion(predited_label, label)
         loss.backward()
+        torch.nn.utils.clip_grad_norm_(model.parameters(), 0.1)
         optimizer.step()
-        train_acc += (output.argmax(1) == cls).sum().item()
-
-    # Adjust the learning rate
-    scheduler.step()
+        total_acc += (predited_label.argmax(1) == label).sum().item()
+        total_count += label.size(0)
+        if idx % log_interval == 0 and idx > 0:
+            elapsed = time.time() - start_time
+            print('| epoch {:3d} | {:5d}/{:5d} batches '
+                  '| accuracy {:8.3f}'.format(epoch, idx, len(dataloader),
+                                              total_acc/total_count))
+            total_acc, total_count = 0, 0
+            start_time = time.time()
+
+def evaluate(dataloader):
+    model.eval()
+    total_acc, total_count = 0, 0
 
-    return train_loss / len(sub_train_), train_acc / len(sub_train_)
-
-def test(data_):
-    loss = 0
-    acc = 0
-    data = DataLoader(data_, batch_size=BATCH_SIZE, collate_fn=generate_batch)
-    for text, offsets, cls in data:
-        text, offsets, cls = text.to(device), offsets.to(device), cls.to(device)
-        with torch.no_grad():
-            output = model(text, offsets)
-            loss = criterion(output, cls)
-            loss += loss.item()
-            acc += (output.argmax(1) == cls).sum().item()
-
-    return loss / len(data_), acc / len(data_)
+    with torch.no_grad():
+        for idx, (label, text, offsets) in enumerate(dataloader):
+            predited_label = model(text, offsets)
+            loss = criterion(predited_label, label)
+            total_acc += (predited_label.argmax(1) == label).sum().item()
+            total_count += label.size(0)
+    return total_acc/total_count
 
 
 ######################################################################
 # Split the dataset and run the model
 # -----------------------------------
 #
-# Since the original AG_NEWS has no valid dataset, we split the training
+# Since the original ``AG_NEWS`` has no valid dataset, we split the training
 # dataset into train/valid sets with a split ratio of 0.95 (train) and
 # 0.05 (valid). Here we use
 # `torch.utils.data.dataset.random_split <https://pytorch.org/docs/stable/data.html?highlight=random_split#torch.utils.data.random_split>`__
 # function in PyTorch core library.
 #
 # `CrossEntropyLoss <https://pytorch.org/docs/stable/nn.html?highlight=crossentropyloss#torch.nn.CrossEntropyLoss>`__
-# criterion combines nn.LogSoftmax() and nn.NLLLoss() in a single class.
+# criterion combines ``nn.LogSoftmax()`` and ``nn.NLLLoss()`` in a single class.
 # It is useful when training a classification problem with C classes.
 # `SGD <https://pytorch.org/docs/stable/_modules/torch/optim/sgd.html>`__
-# implements stochastic gradient descent method as optimizer. The initial
-# learning rate is set to 4.0.
+# implements stochastic gradient descent method as the optimizer. The initial
+# learning rate is set to 5.0.
 # `StepLR <https://pytorch.org/docs/master/_modules/torch/optim/lr_scheduler.html#StepLR>`__
 # is used here to adjust the learning rate through epochs.
 #
 
-import time
-from torch.utils.data.dataset import random_split
-N_EPOCHS = 5
-min_valid_loss = float('inf')
-
-criterion = torch.nn.CrossEntropyLoss().to(device)
-optimizer = torch.optim.SGD(model.parameters(), lr=4.0)
-scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1, gamma=0.9)
-
-train_len = int(len(train_dataset) * 0.95)
-sub_train_, sub_valid_ = \
-    random_split(train_dataset, [train_len, len(train_dataset) - train_len])
-
-for epoch in range(N_EPOCHS):
-
-    start_time = time.time()
-    train_loss, train_acc = train_func(sub_train_)
-    valid_loss, valid_acc = test(sub_valid_)
 
-    secs = int(time.time() - start_time)
-    mins = secs / 60
-    secs = secs % 60
-
-    print('Epoch: %d' %(epoch + 1), " | time in %d minutes, %d seconds" %(mins, secs))
-    print(f'\tLoss: {train_loss:.4f}(train)\t|\tAcc: {train_acc * 100:.1f}%(train)')
-    print(f'\tLoss: {valid_loss:.4f}(valid)\t|\tAcc: {valid_acc * 100:.1f}%(valid)')
+from torch.utils.data.dataset import random_split
+# Hyperparameters
+EPOCHS = 10 # epoch
+LR = 5  # learning rate
+BATCH_SIZE = 64 # batch size for training
+  
+criterion = torch.nn.CrossEntropyLoss()
+optimizer = torch.optim.SGD(model.parameters(), lr=LR)
+scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.1)
+total_accu = None
+train_iter, test_iter = AG_NEWS()
+train_dataset = list(train_iter)
+test_dataset = list(test_iter)
+num_train = int(len(train_dataset) * 0.95)
+split_train_, split_valid_ = \
+    random_split(train_dataset, [num_train, len(train_dataset) - num_train])
+
+train_dataloader = DataLoader(split_train_, batch_size=BATCH_SIZE,
+                              shuffle=True, collate_fn=collate_batch)
+valid_dataloader = DataLoader(split_valid_, batch_size=BATCH_SIZE,
+                              shuffle=True, collate_fn=collate_batch)
+test_dataloader = DataLoader(test_dataset, batch_size=BATCH_SIZE,
+                             shuffle=True, collate_fn=collate_batch)
+
+for epoch in range(1, EPOCHS + 1):
+    epoch_start_time = time.time()
+    train(train_dataloader)
+    accu_val = evaluate(valid_dataloader)
+    if total_accu is not None and total_accu > accu_val:
+      scheduler.step()
+    else:
+       total_accu = accu_val
+    print('-' * 59)
+    print('| end of epoch {:3d} | time: {:5.2f}s | '
+          'valid accuracy {:8.3f} '.format(epoch,
+                                           time.time() - epoch_start_time,
+                                           accu_val))
+    print('-' * 59)
 
 
 ######################################################################
-# Running the model on GPU with the following information:
-#
-# Epoch: 1 \| time in 0 minutes, 11 seconds
+# Running the model on GPU with the following printout:
 #
 # ::
 #
-#        Loss: 0.0263(train)     |       Acc: 84.5%(train)
-#        Loss: 0.0001(valid)     |       Acc: 89.0%(valid)
-#
-#
-# Epoch: 2 \| time in 0 minutes, 10 seconds
-#
-# ::
-#
-#        Loss: 0.0119(train)     |       Acc: 93.6%(train)
-#        Loss: 0.0000(valid)     |       Acc: 89.6%(valid)
-#
-#
-# Epoch: 3 \| time in 0 minutes, 9 seconds
-#
-# ::
-#
-#        Loss: 0.0069(train)     |       Acc: 96.4%(train)
-#        Loss: 0.0000(valid)     |       Acc: 90.5%(valid)
-#
-#
-# Epoch: 4 \| time in 0 minutes, 11 seconds
-#
-# ::
-#
-#        Loss: 0.0038(train)     |       Acc: 98.2%(train)
-#        Loss: 0.0000(valid)     |       Acc: 90.4%(valid)
-#
-#
-# Epoch: 5 \| time in 0 minutes, 11 seconds
-#
-# ::
-#
-#        Loss: 0.0022(train)     |       Acc: 99.0%(train)
-#        Loss: 0.0000(valid)     |       Acc: 91.0%(valid)
-#
+#        | epoch   1 |   500/ 1782 batches | accuracy    0.684
+#        | epoch   1 |  1000/ 1782 batches | accuracy    0.852
+#        | epoch   1 |  1500/ 1782 batches | accuracy    0.877
+#        -----------------------------------------------------------
+#        | end of epoch   1 | time:  8.33s | valid accuracy    0.867
+#        -----------------------------------------------------------
+#        | epoch   2 |   500/ 1782 batches | accuracy    0.895
+#        | epoch   2 |  1000/ 1782 batches | accuracy    0.900
+#        | epoch   2 |  1500/ 1782 batches | accuracy    0.903
+#        -----------------------------------------------------------
+#        | end of epoch   2 | time:  8.18s | valid accuracy    0.890
+#        -----------------------------------------------------------
+#        | epoch   3 |   500/ 1782 batches | accuracy    0.914
+#        | epoch   3 |  1000/ 1782 batches | accuracy    0.914
+#        | epoch   3 |  1500/ 1782 batches | accuracy    0.916
+#        -----------------------------------------------------------
+#        | end of epoch   3 | time:  8.20s | valid accuracy    0.897
+#        -----------------------------------------------------------
+#        | epoch   4 |   500/ 1782 batches | accuracy    0.926
+#        | epoch   4 |  1000/ 1782 batches | accuracy    0.924
+#        | epoch   4 |  1500/ 1782 batches | accuracy    0.921
+#        -----------------------------------------------------------
+#        | end of epoch   4 | time:  8.18s | valid accuracy    0.895
+#        -----------------------------------------------------------
+#        | epoch   5 |   500/ 1782 batches | accuracy    0.938
+#        | epoch   5 |  1000/ 1782 batches | accuracy    0.935
+#        | epoch   5 |  1500/ 1782 batches | accuracy    0.937
+#        -----------------------------------------------------------
+#        | end of epoch   5 | time:  8.16s | valid accuracy    0.902
+#        -----------------------------------------------------------
+#        | epoch   6 |   500/ 1782 batches | accuracy    0.939
+#        | epoch   6 |  1000/ 1782 batches | accuracy    0.939
+#        | epoch   6 |  1500/ 1782 batches | accuracy    0.938
+#        -----------------------------------------------------------
+#        | end of epoch   6 | time:  8.16s | valid accuracy    0.906
+#        -----------------------------------------------------------
+#        | epoch   7 |   500/ 1782 batches | accuracy    0.941
+#        | epoch   7 |  1000/ 1782 batches | accuracy    0.939
+#        | epoch   7 |  1500/ 1782 batches | accuracy    0.939
+#        -----------------------------------------------------------
+#        | end of epoch   7 | time:  8.19s | valid accuracy    0.903
+#        -----------------------------------------------------------
+#        | epoch   8 |   500/ 1782 batches | accuracy    0.942
+#        | epoch   8 |  1000/ 1782 batches | accuracy    0.941
+#        | epoch   8 |  1500/ 1782 batches | accuracy    0.942
+#        -----------------------------------------------------------
+#        | end of epoch   8 | time:  8.16s | valid accuracy    0.904
+#        -----------------------------------------------------------
+#        | epoch   9 |   500/ 1782 batches | accuracy    0.942
+#        | epoch   9 |  1000/ 1782 batches | accuracy    0.941
+#        | epoch   9 |  1500/ 1782 batches | accuracy    0.942
+#        -----------------------------------------------------------
+#          end of epoch   9 | time:  8.16s | valid accuracy    0.904
+#        -----------------------------------------------------------
+#        | epoch  10 |   500/ 1782 batches | accuracy    0.940
+#        | epoch  10 |  1000/ 1782 batches | accuracy    0.942
+#        | epoch  10 |  1500/ 1782 batches | accuracy    0.942
+#        -----------------------------------------------------------
+#        | end of epoch  10 | time:  8.15s | valid accuracy    0.904
+#        -----------------------------------------------------------
 
 
 ######################################################################
@@ -301,17 +357,20 @@ def test(data_):
 # ------------------------------------
 #
 
-print('Checking the results of test dataset...')
-test_loss, test_acc = test(test_dataset)
-print(f'\tLoss: {test_loss:.4f}(test)\t|\tAcc: {test_acc * 100:.1f}%(test)')
 
 
 ######################################################################
-# Checking the results of test dataset…
+# Checking the results of the test dataset…
+
+print('Checking the results of test dataset.')
+accu_test = evaluate(test_dataloader)
+print('test accuracy {:8.3f}'.format(accu_test))
+
+################################################
 #
 # ::
 #
-#        Loss: 0.0237(test)      |       Acc: 90.5%(test)
+#        test accuracy    0.906
 #
 
 
@@ -319,25 +378,18 @@ def test(data_):
 # Test on a random news
 # ---------------------
 #
-# Use the best model so far and test a golf news. The label information is
-# available
-# `here <https://pytorch.org/text/datasets.html?highlight=ag_news#torchtext.datasets.AG_NEWS>`__.
+# Use the best model so far and test a golf news.
 #
 
-import re
-from torchtext.data.utils import ngrams_iterator
-from torchtext.data.utils import get_tokenizer
 
-ag_news_label = {1 : "World",
-                 2 : "Sports",
-                 3 : "Business",
-                 4 : "Sci/Tec"}
+ag_news_label = {1: "World",
+                 2: "Sports",
+                 3: "Business",
+                 4: "Sci/Tec"}
 
-def predict(text, model, vocab, ngrams):
-    tokenizer = get_tokenizer("basic_english")
+def predict(text, text_pipeline):
     with torch.no_grad():
-        text = torch.tensor([vocab[token]
-                            for token in ngrams_iterator(tokenizer(text), ngrams)])
+        text = torch.tensor(text_pipeline(text))
         output = model(text, torch.tensor([0]))
         return output.argmax(1).item() + 1
 
@@ -353,17 +405,14 @@ def predict(text, model, vocab, ngrams):
     was even more impressive considering he’d never played the \
     front nine at TPC Southwind."
 
-vocab = train_dataset.get_vocab()
 model = model.to("cpu")
 
-print("This is a %s news" %ag_news_label[predict(ex_text_str, model, vocab, 2)])
-
-######################################################################
-# This is a Sports news
-#
+print("This is a %s news" %ag_news_label[predict(ex_text_str, text_pipeline)])
 
 
-######################################################################
-# You can find the code examples displayed in this note
-# `here <https://github.com/pytorch/text/tree/master/examples/text_classification>`__.
+################################################
+#
+# ::
+#
+#        This is a Sports news
 #

From 68ca41de70a3842b601c19b5aed9eb61718bfa75 Mon Sep 17 00:00:00 2001
From: Guanheng George Zhang <6156351+zhangguanheng66@users.noreply.github.com>
Date: Thu, 4 Mar 2021 09:32:46 -0500
Subject: [PATCH 04/13] [1.8 release] Switch to LM dataset in torchtext 0.9.0
 release (#1349)

* switch to raw text dataset in torchtext 0.9.0 release

* follow the new API in torchtext master

Co-authored-by: Guanheng Zhang <zhangguanheng@devfair0197.h2.fair>
Co-authored-by: Brian Johnson <brianjo@fb.com>
---
 beginner_source/transformer_tutorial.py | 22 ++++++++++++----------
 1 file changed, 12 insertions(+), 10 deletions(-)

diff --git a/beginner_source/transformer_tutorial.py b/beginner_source/transformer_tutorial.py
index 0c5a484c457..78fbb68ccaa 100644
--- a/beginner_source/transformer_tutorial.py
+++ b/beginner_source/transformer_tutorial.py
@@ -145,25 +145,27 @@ def forward(self, x):
 
 import io
 import torch
-from torchtext.utils import download_from_url, extract_archive
+from torchtext.datasets import WikiText2
 from torchtext.data.utils import get_tokenizer
-from torchtext.vocab import build_vocab_from_iterator
+from collections import Counter
+from torchtext.vocab import Vocab
 
-url = 'https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip'
-test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url))
+train_iter = WikiText2(split='train')
 tokenizer = get_tokenizer('basic_english')
-vocab = build_vocab_from_iterator(map(tokenizer,
-                                      iter(io.open(train_filepath,
-                                                   encoding="utf8"))))
+counter = Counter()
+for line in train_iter:
+    counter.update(tokenizer(line))
+vocab = Vocab(counter)
 
 def data_process(raw_text_iter):
   data = [torch.tensor([vocab[token] for token in tokenizer(item)],
                        dtype=torch.long) for item in raw_text_iter]
   return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))
 
-train_data = data_process(iter(io.open(train_filepath, encoding="utf8")))
-val_data = data_process(iter(io.open(valid_filepath, encoding="utf8")))
-test_data = data_process(iter(io.open(test_filepath, encoding="utf8")))
+train_iter, val_iter, test_iter = WikiText2()
+train_data = data_process(train_iter)
+val_data = data_process(val_iter)
+test_data = data_process(test_iter)
 
 device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
 

From 63bfc84a84a8a9158c1d6151568e0d7dc7e2e5da Mon Sep 17 00:00:00 2001
From: James Reed <jamesreed@fb.com>
Date: Thu, 4 Mar 2021 06:41:29 -0800
Subject: [PATCH 05/13] [WIP][FX] CPU Performance Profiling with FX (#1319)

Co-authored-by: Brian Johnson <brianjo@fb.com>
---
 index.rst                                    |  17 ++
 intermediate_source/fx_profiling_tutorial.py | 236 +++++++++++++++++++
 2 files changed, 253 insertions(+)
 create mode 100644 intermediate_source/fx_profiling_tutorial.py

diff --git a/index.rst b/index.rst
index ad9bea0ca84..813eb6c5b4a 100644
--- a/index.rst
+++ b/index.rst
@@ -215,6 +215,15 @@ Welcome to PyTorch Tutorials
    :link: advanced/super_resolution_with_onnxruntime.html
    :tags: Production
 
+.. Code Transformations with FX
+
+.. customcarditem::
+   :header: Building a Simple Performance Profiler with FX
+   :card_description: Build a simple FX interpreter to record the runtime of op, module, and function calls and report statistics
+   :image: _static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png
+   :link: intermediate/fx_profiling_tutorial.html
+   :tags: FX
+
 .. Frontend APIs
 
 .. customcarditem::
@@ -537,6 +546,14 @@ Additional Resources
    advanced/cpp_export
    advanced/super_resolution_with_onnxruntime
 
+.. toctree::
+   :maxdepth: 2
+   :includehidden:
+   :hidden:
+   :caption: Code Transforms with FX
+
+   intermediate/fx_profiling_tutorial
+
 .. toctree::
    :maxdepth: 2
    :includehidden:
diff --git a/intermediate_source/fx_profiling_tutorial.py b/intermediate_source/fx_profiling_tutorial.py
new file mode 100644
index 00000000000..d54f3ccb61e
--- /dev/null
+++ b/intermediate_source/fx_profiling_tutorial.py
@@ -0,0 +1,236 @@
+# -*- coding: utf-8 -*-
+"""
+(beta) Building a Simple CPU Performance Profiler with FX
+*******************************************************
+**Author**: `James Reed <https://github.com/jamesr66a>`_
+
+In this tutorial, we are going to use FX to do the following:
+
+1) Capture PyTorch Python code in a way that we can inspect and gather
+   statistics about the structure and execution of the code
+2) Build out a small class that will serve as a simple performance "profiler",
+   collecting runtime statistics about each part of the model from actual
+   runs.
+
+"""
+
+######################################################################
+# For this tutorial, we are going to use the torchvision ResNet18 model
+# for demonstration purposes.
+
+import torch
+import torch.fx
+import torchvision.models as models
+
+rn18 = models.resnet18()
+rn18.eval()
+
+######################################################################
+# Now that we have our model, we want to inspect deeper into its
+# performance. That is, for the following invocation, which parts
+# of the model are taking the longest?
+input = torch.randn(5, 3, 224, 224)
+output = rn18(input)
+
+######################################################################
+# A common way of answering that question is to go through the program
+# source, add code that collects timestamps at various points in the
+# program, and compare the difference between those timestamps to see
+# how long the regions between the timestamps take.
+#
+# That technique is certainly applicable to PyTorch code, however it
+# would be nicer if we didn't have to copy over model code and edit it,
+# especially code we haven't written (like this torchvision model).
+# Instead, we are going to use FX to automate this "instrumentation"
+# process without needing to modify any source.
+
+######################################################################
+# First, let's get some imports out of the way (we will be using all
+# of these later in the code).
+
+import statistics, tabulate, time
+from typing import Any, Dict, List
+from torch.fx import Interpreter
+
+######################################################################
+# .. note::
+#     ``tabulate`` is an external library that is not a dependency of PyTorch.
+#     We will be using it to more easily visualize performance data. Please
+#     make sure you've installed it from your favorite Python package source.
+
+######################################################################
+# Capturing the Model with Symbolic Tracing
+# -----------------------------------------
+# Next, we are going to use FX's symbolic tracing mechanism to capture
+# the definition of our model in a data structure we can manipulate
+# and examine.
+
+traced_rn18 = torch.fx.symbolic_trace(rn18)
+print(traced_rn18.graph)
+
+######################################################################
+# This gives us a Graph representation of the ResNet18 model. A Graph
+# consists of a series of Nodes connected to each other. Each Node
+# represents a call-site in the Python code (whether to a function,
+# a module, or a method) and the edges (represented as ``args`` and ``kwargs``
+# on each node) represent the values passed between these call-sites. More
+# information about the Graph representation and the rest of FX's APIs ca
+# be found at the FX documentation https://pytorch.org/docs/master/fx.html.
+
+
+######################################################################
+# Creating a Profiling Interpreter
+# --------------------------------
+# Next, we are going to create a class that inherits from ``torch.fx.Interpreter``.
+# Though the ``GraphModule`` that ``symbolic_trace`` produces compiles Python code
+# that is run when you call a ``GraphModule``, an alternative way to run a
+# ``GraphModule`` is by executing each ``Node`` in the ``Graph`` one by one. That is
+# the functionality that ``Interpreter`` provides: It interprets the graph node-
+# by-node.
+#
+# By inheriting from ``Interpreter``, we can override various functionality and
+# install the profiling behavior we want. The goal is to have an object to which
+# we can pass a model, invoke the model 1 or more times, then get statistics about
+# how long the model and each part of the model took during those runs.
+#
+# Let's define our ``ProfilingInterpreter`` class:
+
+class ProfilingInterpreter(Interpreter):
+    def __init__(self, mod : torch.nn.Module):
+        # Rather than have the user symbolically trace their model,
+        # we're going to do it in the constructor. As a result, the
+        # user can pass in any ``Module`` without having to worry about
+        # symbolic tracing APIs
+        gm = torch.fx.symbolic_trace(mod)
+        super().__init__(gm)
+
+        # We are going to store away two things here:
+        #
+        # 1. A list of total runtimes for ``mod``. In other words, we are
+        #    storing away the time ``mod(...)`` took each time this
+        #    interpreter is called.
+        self.total_runtime_sec : List[float] = []
+        # 2. A map from ``Node`` to a list of times (in seconds) that
+        #    node took to run. This can be seen as similar to (1) but
+        #    for specific sub-parts of the model.
+        self.runtimes_sec : Dict[torch.fx.Node, List[float]] = {}
+
+    ######################################################################
+    # Next, let's override our first method: ``run()``. ``Interpreter``'s ``run``
+    # method is the top-level entrypoint for execution of the model. We will
+    # want to intercept this so that we can record the total runtime of the
+    # model.
+
+    def run(self, *args) -> Any:
+        # Record the time we started running the model
+        t_start = time.time()
+        # Run the model by delegating back into Interpreter.run()
+        return_val = super().run(*args)
+        # Record the time we finished running the model
+        t_end = time.time()
+        # Store the total elapsed time this model execution took in the
+        # ProfilingInterpreter
+        self.total_runtime_sec.append(t_end - t_start)
+        return return_val
+
+    ######################################################################
+    # Now, let's override ``run_node``. ``Interpreter`` calls ``run_node`` each
+    # time it executes a single node. We will intercept this so that we
+    # can measure and record the time taken for each individual call in
+    # the model.
+
+    def run_node(self, n : torch.fx.Node) -> Any:
+        # Record the time we started running the op
+        t_start = time.time()
+        # Run the op by delegating back into Interpreter.run_node()
+        return_val = super().run_node(n)
+        # Record the time we finished running the op
+        t_end = time.time()
+        # If we don't have an entry for this node in our runtimes_sec
+        # data structure, add one with an empty list value.
+        self.runtimes_sec.setdefault(n, [])
+        # Record the total elapsed time for this single invocation
+        # in the runtimes_sec data structure
+        self.runtimes_sec[n].append(t_end - t_start)
+        return return_val
+
+    ######################################################################
+    # Finally, we are going to define a method (one which doesn't override
+    # any ``Interpreter`` method) that provides us a nice, organized view of
+    # the data we have collected.
+
+    def summary(self, should_sort : bool = False) -> str:
+        # Build up a list of summary information for each node
+        node_summaries : List[List[Any]] = []
+        # Calculate the mean runtime for the whole network. Because the
+        # network may have been called multiple times during profiling,
+        # we need to summarize the runtimes. We choose to use the
+        # arithmetic mean for this.
+        mean_total_runtime = statistics.mean(self.total_runtime_sec)
+
+        # For each node, record summary statistics
+        for node, runtimes in self.runtimes_sec.items():
+            # Similarly, compute the mean runtime for ``node``
+            mean_runtime = statistics.mean(runtimes)
+            # For easier understanding, we also compute the percentage
+            # time each node took with respect to the whole network.
+            pct_total = mean_runtime / mean_total_runtime * 100
+            # Record the node's type, name of the node, mean runtime, and
+            # percent runtim
+            node_summaries.append(
+                [node.op, str(node), mean_runtime, pct_total])
+
+        # One of the most important questions to answer when doing performance
+        # profiling is "Which op(s) took the longest?". We can make this easy
+        # to see by providing sorting functionality in our summary view
+        if should_sort:
+            node_summaries.sort(key=lambda s: s[2], reverse=True)
+
+        # Use the ``tabulate`` library to create a well-formatted table
+        # presenting our summary information
+        headers : List[str] = [
+            'Op type', 'Op', 'Average runtime (s)', 'Pct total runtime'
+        ]
+        return tabulate.tabulate(node_summaries, headers=headers)
+
+######################################################################
+# .. note::
+#       We use Python's ``time.time`` function to pull wall clock
+#       timestamps and compare them. This is not the most accurate
+#       way to measure performance, and will only give us a first-
+#       order approximation. We use this simple technique only for the
+#       purpose of demonstration in this tutorial.
+
+######################################################################
+# Investigating the Performance of ResNet18
+# -----------------------------------------
+# We can now use ``ProfilingInterpreter`` to inspect the performance
+# characteristics of our ResNet18 model;
+
+interp = ProfilingInterpreter(rn18)
+interp.run(input)
+print(interp.summary(True))
+
+######################################################################
+# There are two things we should call out here:
+#
+# * MaxPool2d takes up the most time. This is a known issue:
+#   https://github.com/pytorch/pytorch/issues/51393
+# * BatchNorm2d also takes up significant time. We can continue this
+#   line of thinking and optimize this in the Conv-BN Fusion with FX
+#   tutorial TODO: link
+#
+#
+# Conclusion
+# ----------
+# As we can see, using FX we can easily capture PyTorch programs (even
+# ones we don't have the source code for!) in a machine-interpretable
+# format and use that for analysis, such as the performance analysis
+# we've done here. FX opens up an exiciting world of possibilities for
+# working with PyTorch programs.
+#
+# Finally, since FX is still in beta, we would be happy to hear any
+# feedback you have about using it. Please feel free to use the
+# PyTorch Forums (https://discuss.pytorch.org/) and the issue tracker
+# (https://github.com/pytorch/pytorch/issues) to provide any feedback
+# you might have.

From 5bda6b07d2fd2bc20062157c98f95ed0da8c418b Mon Sep 17 00:00:00 2001
From: Horace He <horacehe2007@yahoo.com>
Date: Thu, 4 Mar 2021 06:45:37 -0800
Subject: [PATCH 06/13] [FX] Added fuser tutorial (#1356)

* Added fuser tutorial

* updated index.rst

* fixed conclusion

* responded to some comments

* responded to comments

* respond

Co-authored-by: Brian Johnson <brianjo@fb.com>
---
 index.rst                               |   8 +-
 intermediate_source/fx_conv_bn_fuser.py | 262 ++++++++++++++++++++++++
 2 files changed, 269 insertions(+), 1 deletion(-)
 create mode 100644 intermediate_source/fx_conv_bn_fuser.py

diff --git a/index.rst b/index.rst
index 813eb6c5b4a..c2637e5ce9a 100644
--- a/index.rst
+++ b/index.rst
@@ -217,6 +217,13 @@ Welcome to PyTorch Tutorials
 
 .. Code Transformations with FX
 
+.. customcarditem::
+   :header: Building a Convolution/Batch Norm fuser in FX
+   :card_description: Build a simple FX pass that fuses batch norm into convolution to improve performance during inference.
+   :image: _static/img/thumbnails/cropped/Deploying-PyTorch-in-Python-via-a-REST-API-with-Flask.png
+   :link: intermediate/fx_conv_bn_fuser.html
+   :tags: FX
+
 .. customcarditem::
    :header: Building a Simple Performance Profiler with FX
    :card_description: Build a simple FX interpreter to record the runtime of op, module, and function calls and report statistics
@@ -614,4 +621,3 @@ Additional Resources
 
    beginner/deeplabv3_on_ios
    beginner/deeplabv3_on_android
-   
diff --git a/intermediate_source/fx_conv_bn_fuser.py b/intermediate_source/fx_conv_bn_fuser.py
new file mode 100644
index 00000000000..93b89c08fec
--- /dev/null
+++ b/intermediate_source/fx_conv_bn_fuser.py
@@ -0,0 +1,262 @@
+# -*- coding: utf-8 -*-
+"""
+(beta) Building a Convolution/Batch Norm fuser in FX
+*******************************************************
+**Author**: `Horace He <https://github.com/chillee>`_
+
+In this tutorial, we are going to use FX, a toolkit for composable function
+transformations of PyTorch, to do the following:
+
+1) Find patterns of conv/batch norm in the data dependencies.
+2) For the patterns found in 1), fold the batch norm statistics into the convolution weights.
+
+Note that this optimization only works for models in inference mode (i.e. `mode.eval()`)
+
+We will be building the fuser that exists here:
+https://github.com/pytorch/pytorch/blob/orig/release/1.8/torch/fx/experimental/fuser.py
+
+"""
+
+
+######################################################################
+# First, let's get some imports out of the way (we will be using all
+# of these later in the code).
+
+from typing import Type, Dict, Any, Tuple, Iterable
+import copy
+import torch.fx as fx
+import torch
+import torch.nn as nn
+
+######################################################################
+# For this tutorial, we are going to create a model consisting of convolutions
+# and batch norms. Note that this model has some tricky components - some of
+# the conv/batch norm patterns are hidden within Sequentials and one of the
+# BatchNorms is wrapped in another Module.
+
+class WrappedBatchNorm(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.mod = nn.BatchNorm2d(1)
+    def forward(self, x):
+        return self.mod(x)
+
+class M(nn.Module):
+    def __init__(self):
+        super().__init__()
+        self.conv1 = nn.Conv2d(1, 1, 1)
+        self.bn1 = nn.BatchNorm2d(1)
+        self.conv2 = nn.Conv2d(1, 1, 1)
+        self.nested = nn.Sequential(
+            nn.BatchNorm2d(1),
+            nn.Conv2d(1, 1, 1),
+        )
+        self.wrapped = WrappedBatchnorm()
+
+    def forward(self, x):
+        x = self.conv1(x)
+        x = self.bn1(x)
+        x = self.conv2(x)
+        x = self.nested(x)
+        x = self.wrapped(x)
+        return x
+
+model = M()
+
+model.eval()
+
+######################################################################
+# Fusing Convolution with Batch Norm
+# -----------------------------------------
+# One of the primary challenges with trying to automatically fuse convolution
+# and batch norm in PyTorch is that PyTorch does not provide an easy way of
+# accessing the computational graph. FX resolves this problem by symbolically
+# tracing the actual operations called, so that we can track the computations
+# through the `forward` call, nested within Sequential modules, or wrapped in
+# an user-defined module.
+
+traced_model = torch.fx.symbolic_trace(model)
+print(traced_model.graph)
+
+######################################################################
+# This gives us a graph representation of our model. Note that both the modules
+# hidden within the sequential as well as the wrapped Module have been inlined
+# into the graph. This is the default level of abstraction, but it can be
+# configured by the pass writer. More information can be found at the FX
+# overview https://pytorch.org/docs/master/fx.html#module-torch.fx
+
+
+####################################
+# Fusing Convolution with Batch Norm
+# ----------------------------------
+# Unlike some other fusions, fusion of convolution with batch norm does not
+# require any new operators. Instead, as batch norm during inference
+# consists of a pointwise add and multiply, these operations can be "baked"
+# into the preceding convolution's weights. This allows us to remove the batch
+# norm entirely from our model! Read
+# https://nenadmarkus.com/p/fusing-batchnorm-and-conv/ for further details. The
+# code here is copied from
+# https://github.com/pytorch/pytorch/blob/orig/release/1.8/torch/nn/utils/fusion.py
+# clarity purposes.
+def fuse_conv_bn_eval(conv, bn):
+    """
+    Given a conv Module `A` and an batch_norm module `B`, returns a conv
+    module `C` such that C(x) == B(A(x)) in inference mode.
+    """
+    assert(not (conv.training or bn.training)), "Fusion only for eval!"
+    fused_conv = copy.deepcopy(conv)
+
+    fused_conv.weight, fused_conv.bias = \
+        fuse_conv_bn_weights(fused_conv.weight, fused_conv.bias,
+                             bn.running_mean, bn.running_var, bn.eps, bn.weight, bn.bias)
+
+    return fused_conv
+
+def fuse_conv_bn_weights(conv_w, conv_b, bn_rm, bn_rv, bn_eps, bn_w, bn_b):
+    if conv_b is None:
+        conv_b = torch.zeros_like(bn_rm)
+    if bn_w is None:
+        bn_w = torch.ones_like(bn_rm)
+    if bn_b is None:
+        bn_b = torch.zeros_like(bn_rm)
+    bn_var_rsqrt = torch.rsqrt(bn_rv + bn_eps)
+
+    conv_w = conv_w * (bn_w * bn_var_rsqrt).reshape([-1] + [1] * (len(conv_w.shape) - 1))
+    conv_b = (conv_b - bn_rm) * bn_var_rsqrt * bn_w + bn_b
+
+    return torch.nn.Parameter(conv_w), torch.nn.Parameter(conv_b)
+
+
+####################################
+# FX Fusion Pass
+# ----------------------------------
+# Now that we have our computational graph as well as a method for fusing
+# convolution and batch norm, all that remains is to iterate over the FX graph
+# and apply the desired fusions.
+
+
+def _parent_name(target : str) -> Tuple[str, str]:
+    """
+    Splits a qualname into parent path and last atom.
+    For example, `foo.bar.baz` -> (`foo.bar`, `baz`)
+    """
+    *parent, name = target.rsplit('.', 1)
+    return parent[0] if parent else '', name
+
+def replace_node_module(node: fx.Node, modules: Dict[str, Any], new_module: torch.nn.Module):
+    assert(isinstance(node.target, str))
+    parent_name, name = _parent_name(node.target)
+    setattr(modules[parent_name], name, new_module)
+
+
+def fuse(model: torch.nn.Module) -> torch.nn.Module:
+    model = copy.deepcopy(model)
+    # The first step of most FX passes is to symbolically trace our model to
+    # obtain a `GraphModule`. This is a representation of our original model
+    # that is functionally identical to our original model, except that we now
+    # also have a graph representation of our forward pass.
+    fx_model: fx.GraphModule = fx.symbolic_trace(model)
+    modules = dict(fx_model.named_modules())
+
+    # The primary representation for working with FX are the `Graph` and the
+    # `Node`. Each `GraphModule` has a `Graph` associated with it - this
+    # `Graph` is also what generates `GraphModule.code`.
+    # The `Graph` itself is represented as a list of `Node` objects. Thus, to
+    # iterate through all of the operations in our graph, we iterate over each
+    # `Node` in our `Graph`.
+    for node in fx_model.graph.nodes:
+        # The FX IR contains several types of nodes, which generally represent
+        # call sites to modules, functions, or methods. The type of node is
+        # determined by `Node.op`.
+        if node.op != 'call_module': # If our current node isn't calling a Module then we can ignore it.
+            continue
+        # For call sites, `Node.target` represents the module/function/method
+        # that's being called. Here, we check `Node.target` to see if it's a
+        # batch norm module, and then check `Node.args[0].target` to see if the
+        # input `Node` is a convolution.
+        if type(modules[node.target]) is nn.BatchNorm2d and type(modules[node.args[0].target]) is nn.Conv2d:
+            if len(node.args[0].users) > 1:  # Output of conv is used by other nodes
+                continue
+            conv = modules[node.args[0].target]
+            bn = modules[node.target]
+            fused_conv = fuse_conv_bn_eval(conv, bn)
+            replace_node_module(node.args[0], modules, fused_conv)
+            # As we've folded the batch nor into the conv, we need to replace all uses
+            # of the batch norm with the conv.
+            node.replace_all_uses_with(node.args[0])
+            # Now that all uses of the batch norm have been replaced, we can
+            # safely remove the batch norm.
+            fx_model.graph.erase_node(node)
+    fx_model.graph.lint()
+    # After we've modified our graph, we need to recompile our graph in order
+    # to keep the generated code in sync.
+    fx_model.recompile()
+    return fx_model
+
+
+######################################################################
+# .. note::
+#       We make some simplifications here for demonstration purposes, such as only
+#       matching 2D convolutions. View
+#       https://github.com/pytorch/pytorch/blob/master/torch/fx/experimental/fuser.py
+#       for a more usable pass.
+
+######################################################################
+# Testing out our Fusion Pass
+# -----------------------------------------
+# We can now run this fusion pass on our initial toy model and verify that our
+# results are identical. In addition, we can print out the code for our fused
+# model and verify that there are no more batch norms.
+
+
+fused_model = fuse(model)
+print(fused_model.code)
+inp = torch.randn(5, 1, 1, 1)
+torch.testing.assert_allclose(fused_model(inp), model(inp))
+
+
+######################################################################
+# Benchmarking our Fusion on ResNet18
+# ----------
+# We can test our fusion pass on a larger model like ResNet18 and see how much
+# this pass improves inference performance.
+import torchvision.models as models
+import time
+
+rn18 = models.resnet18()
+rn18.eval()
+
+inp = torch.randn(10, 3, 224, 224)
+output = rn18(inp)
+
+def benchmark(model, iters=20):
+    for _ in range(10):
+        model(inp)
+    begin = time.time()
+    for _ in range(iters):
+        model(inp)
+    return str(time.time()-begin)
+
+fused_rn18 = fuse(rn18)
+print("Unfused time: ", benchmark(rn18))
+print("Fused time: ", benchmark(fused_rn18))
+######################################################################
+# As we previously saw, the output of our FX transformation is
+# (Torchscriptable) PyTorch code, we can easily `jit.script` the output to try
+# and increase our performance even more. In this way, our FX model
+# transformation composes with Torchscript with no issues.
+jit_rn18 = torch.jit.script(fused_rn18)
+print("jit time: ", benchmark(jit_rn18))
+
+
+############
+# Conclusion
+# ----------
+# As we can see, using FX we can easily write static graph transformations on
+# PyTorch code.
+#
+# Since FX is still in beta, we would be happy to hear any
+# feedback you have about using it. Please feel free to use the
+# PyTorch Forums (https://discuss.pytorch.org/) and the issue tracker
+# (https://github.com/pytorch/pytorch/issues) to provide any feedback
+# you might have.
\ No newline at end of file

From 07b6340348a1b00a568795c0ceec37b1e18bba42 Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Thu, 4 Mar 2021 10:27:00 -0500
Subject: [PATCH 07/13] Update numeric_suite_tutorial.py

---
 prototype_source/numeric_suite_tutorial.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py
index 61b7c670fd8..fee8308eb95 100644
--- a/prototype_source/numeric_suite_tutorial.py
+++ b/prototype_source/numeric_suite_tutorial.py
@@ -50,7 +50,7 @@
 float_model.fuse_model()
 float_model.qconfig = torch.quantization.default_qconfig
 img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]
-qmodel = quantize(float_model, default_eval_fn, img_data, inplace=False)
+qmodel = quantize(float_model, default_eval_fn, (img_data,), inplace=False)
 
 ##############################################################################
 # 1. Compare the weights of float and quantized models

From 49362b16ae3ba883cd9edbc0adef4e52701d90e2 Mon Sep 17 00:00:00 2001
From: Pritam Damania <9958665+pritamdamania87@users.noreply.github.com>
Date: Thu, 4 Mar 2021 09:03:26 -0800
Subject: [PATCH 08/13] Tutorial combining DDP with Pipeline Parallelism to
 Train Transformer models (#1347)

* Tutorial combining DDP with Pipeline Parallelism to Train Transformer models.

Summary: Tutorial which places a pipe on GPUs 0 and 1 and another Pipe
on GPUs 2 and 3. Both pipe replicas are replicated via DDP. One process
drives GPUs 0 and 1 and another drives GPUs 2 and 3.

* Polish out some of the docs.

* Add thumbnail and address some comments.

Co-authored-by: pritam <pritam.damania@fb.com>
---
 ...Data-Parallel-and-Pipeline-Parallelism.png | Bin 0 -> 35776 bytes
 advanced_source/ddp_pipeline_tutorial.py      | 464 ++++++++++++++++++
 index.rst                                     |   8 +
 3 files changed, 472 insertions(+)
 create mode 100644 _static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png
 create mode 100644 advanced_source/ddp_pipeline_tutorial.py

diff --git a/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png b/_static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png
new file mode 100644
index 0000000000000000000000000000000000000000..426a14d98f5f7fbbf695626658ad1946fe4ef63e
GIT binary patch
literal 35776
zcmeEt<x^Yj7cCTbD6RpDyE}v;#jVBN-JRlA97?g`TE#uMLvbj@CAgEI2^uc%ow<L+
zo%ww@bI$CK`>bcnTI)&B)>Oj7rou)-Lc&u~R(Owu^vdwR2NV4z(ry2B4GD=JNk!qU
zZeYP_7sh9r#ZbtDAAG_UiA<cB{%s;X7T-YubD}PjouUb&qG~4B==7J`Vm-UXn=Fqk
z4{>(Gz6Q1<_OD+;+>G=~*zEZu#3PU>WIv-4D-){6C*cXst=D#k!;j7i3u!AhP{V)=
zWaN#4(zMfeQ^1d2G$Vv^m<%z5v0Ep)*e`E6k|fmZ|3V6V%9dFFH<0|l!2j~`KREpF
zH2hZ`{#OkCR~-M>8~#6NjSFI|Ezj?_6M|`8ninipY=Ma{ovmwGs~5zaOvE5pOI}KY
z@~s^sP$mSts7EXpABu_1I(=nKr%j>=3!mO7er#x*j;~kza3eNESMo9D=Pm>Oo9hvN
z=b|(onfMub<`}{U4l4@lll|(YrXXVTpc~Rbt7Lh6Nz_J%>31YUEPozl@qb>VqArNV
zto$Ym#N0-rm3ad3mwk*O^wW)gy~0!%Lug$b8Rc}fKWX^<_)!Yp@5A!jJOrUAbu}ST
z6?&{PkuOQ`e4_S@<Fh~U_LO;n$EbD~_}XwMonoJ^D#s@Cena-CulYQBlnoHdr_J88
zJe^7?cltrw^y%~mZ~$)+96@d#f^5>_`qrYEU>?EI-Wss;r#w}m+PqO7CJhASat`x1
z@Tbayc-Vtw$~<wlJ3LUk@3=QF9<<@|nRZ>TUy_n|w%WP_35E>ch?=L}oGg~t))L5s
zs22%Q&i&9DY5FyXE+;RA)}ckhR5#4PA5C74NgJ>c-Qw|xWOxUTUlm1m^MDDydC3)B
zre-9-()+;H*U!)D_}{O?(^K^?d15*bGEww(VCEVYd~c#u249o~ZFzC*%{b<N(G=a&
zajY@$P48QSXCy<Pj;=XQG`11u&R=+j>6|7+1Fg5wk)-R6mMA6@wo}(QHWjf#f7j2Y
zg466!?-2RW_e13Y+`}<qldwxWMY5syHM-hPmGo>FWSi!-i*8cvSzp*CQn%Gzk;mu?
z*pw|(guP<;E_KR3ZgMdH?(cy6D*zGb_keaX=4{8!2s(fIcvTPMe47IMDW)DJ*`R9=
zGh*%dMhbCalH7JxxL_)G;Y!ku>(JeID+|EY9mRq?>bZt24*zS<wLds)aLEx_Wm65d
zmn0DaFK5YG%E3uGyePCk`#Vm|H!XTnJolPKN^+tSbKR}M^0zEJk#_!-Y*7~2gd=q3
zAs33BkC)>ue_c$M&r-3ZsXGe-)nav&lrUJdijq|_SV~>I$yX#-FuIB_h$i+`6!fjz
zM+fKg=?8xN_<>Q|ea~_K@PH9x>%1<Alh$az$D_!oy?!@_)zGO-3;7$A8|4<*HSfUZ
z3(V^Tc3`XlwoNBXufu*X9y_{t46L>J%o;&xF7@O{Vw0N3(o`-szsy-j(MJk@G89Mu
zpu@kS9UVSh*c*UA4m@PxGpJizz=~<g{6~nUGPst1L)4hSL&{@Cv$bI2F*I*rkz!o^
zN%dQUfftsSu}N}2y%+HQFz;_HyV(aWGV9@FN|<0cB7`R7ywA}4{57$u2;oPRj2@$9
z(AahTv`Otkl~Hu@<1TC9(YKtL4H<eF6zomM!)DK@ArI?2N{GimS2yc!>kxQd<f=zB
zECA8f725R`=plo+)_NwvpbOmm49DSr4?>IJG9V_vs^VvY**vf--6)uv-K1dxI<b(8
z-}LlrbJ5qVGC@q#)8H(azD}t|u_2-}NVT1p*pTiAcil?8Mu9WDJB~33+`G-Sp73$2
zhtgBRV?&C|xCs;EG#n|LrU7+?#s?Q)JP^x%DmSH9>-XscDZr{r=5(;D_I$;qc56T1
zW7sB5+Mbc*SD;r}MY_JV6Q~Cd{&6Sx^5qNO`r0scc#+~+4u|%ao{Rq^5OI-4u-rlz
z%kS&P$iny9&?YfX;C>72C+a!hgG8|5NEvej!;bohn)YgZf?*U|^o_hb$bf%&aFnph
zn{w{EhG3*(fO(^eUw~6V346_L-UF~&SWEgQ^C=(P<+&xtC3(tM5DIA%yW7q1nu)~7
zrDkW#`UK*E3l~sl0Y7|KA5C>PFb+HA4od&0K0*3+LvI3lcy^{}Vv=8lX{``Xy}qxZ
z4ci=)f8|1gnbA<nCa6p+Bgd=i_a67TXs?%`2Vcsw`b_y(VPq3?-~<Jil59BMO&y*U
zNq^GuPeXNm!7m%JrJ?c>^k%ph!UXAEWUC(adY?ou_V0)V95;p-K`M3pp}E8!*^{l*
z#MUyPe{PcF+bXS-D%q<Jg?Y^`i`J7X!l2MQZ!VJ-T#w+@RZm3F!Oo04fucA!W`5de
z*~vsp55dH&6n+s#yTke&c>T6j`cAVoG@yE|***CiCpLDFdqy@_BcqyS0#O8f{tp-F
zpdn+VFh|8}PW7;>aT!%Bg3e$FpO;<_^kbbw^iB5f_qaD@8w-)`izx3}75a1)nm*}1
z5c>O;Ir(alyh(OG#Q8bSh$;-;Zt>4NUNHWVLUfsIf$hwqrFt>gb?si}lIKKBFxfP!
zH8GG7VQ*cv3@a=tAOBphM52v*+r5)2S@-eAc`a=947B@2&9g7oV`NR-M}RX4Cqh&-
zPxOM@Bxsz7`{Te+fpF2>@X6dMcp#MJ`PrUdH)XMT-p>OD9d-kS*3-j3hAR8#5DtCm
z2#>?pX@|4WzcwsK3da6;gVZHto*99Cd=BXZ+)jz=Son1@zq`xYeP4G_GNc(78b4Pf
zmR0wut~T-GX)9*m<<^MZt(T*IRaoG8CI1Q|fH_+zp0ba$oMe&vfg4w~B{xB0Uh)Ku
zHDH%zC4~ETHsnpfrq$~L2wHS21}?ttv{o+<&-fXNO|Ro}8THYyiTmJ=3*AU!oWZ6I
z;E5tt5&>fH0{Zu{-dY|_H70$KpYk(c(%d~a27#}($IQsFOBlBT_#*qUoLL+45_}l2
zN3`S^1_$50ATHh-px3^y>cvsxj?(==YB`jme}G*`8vK<Jmd6oP_d1JcMn|K#Vbmug
z_Mvovf3)+{q`G0jc0axBG65muJ8nLViY{?glmVL<$1BF*X2=;eb2EAyT6q)FuY+aV
zKVjx1(n|@k{TiD=k+OXXqJT<HNrTe-r7I(8&4~+X65}T4u8U#n+bx3d_ydbT<l=`@
z2QNt5f&D71GxSYrDvuy~d*cwsw3lF#E@zTX<05snA;hl*+iEnqbN(-uei7VlwPjg6
zAiG*WyUJ`Rl*OZO`yy3$B8M<ql~}aRj2Xg8_wv(ZPnF0Zx>zUY&<>Q_sT?9{-w%wQ
zQLnYDOCiihPX&K=;;l>Zd-Pwi&|-(&m!E4%QAQfPOIdZY9xTJCOXc98E>_xGc8EBW
zJ}i_#d&1zBbJ`F2_sDZ?L}wiVqe!T`%5!ym^=1Bh8IDl0Hjaz_`>lQ)-oc4nK|vx^
zQRN_~ep!wQ3UG_f2tG0Pqa@0KxRZ~ZT{UYW=CG-kfP;D}u}O-%YWHx@MQCVODCpvQ
zex2VqS$-yE@t(qFx91dm)dOhizx-{fGbkxx_MOhZiNe`qkAb%*OYJ`vt>%C0{rse-
ztD5zeN5#DLO`6neC0ak-%TpmQei4W~XF$b(5&Ps9z1q@NR`xkQm;EXRC}|mHnf-5$
zy-95o4-17C7?f!I?*CdT2|??=BDI{?NVsX8i?b*{;;6f=6w|m!8~8osqG!+hDSU{P
zFmzL1c2O#`yrL?4aS)~}$J_tlA^SM@j1CDW`ROL-G=}TTB~2E1a2G!%1K6l&Ln?1~
zR7rqUR{|tk^9g)C&RTGLfQTL7aQz6KOHEPdUY}Z0RNl;`w1QO(?Yd;u0@3>bVntHP
zwK?pTpmBStW*EEF`6f!X;vbd5Vvouuqu``Y=J78?F9+ly9!&<)0hI~48@Cv4`t`wU
zTZQ#Z&`m#8Y#Wot|Io_FFL+UI_-T=i(PT(c_@VXbAc*B-_~Q;Be>|(dwR$bv=(Uq#
z@YBtr*YU3(+ezUUioU-7OU+Xz#WQlcc<?6(JaP(|zf4R(M~R<(Fn(JiVvmfg!CXU~
zsScY-=)g4jiyqny;jFivcre3buscTgv!CT29xPU7NY8?m@J_akK&K5`SKLLHaHe0#
zhv9s5eCDA~ZPRZ`KDN-e|IhYMM$w?<=g<rjL^Ql)$HhYFsdQCfT({{d$hYU3FDhrT
ztlUR{rO=PTx+jY7k}!y&D_vt~MZxN`qcxc(2_8ptudHwPlzMAnpYNl->v{tzuUjD2
z$wKwO<=(j0!&&RjvO}?Av%YXEJ&vn{mzA}6qN{*?<lhcRJI?d1)^p7BQ_pj&9W|pt
zVJ<S1xc**}JLr+pi2`Wo&dEB8oDO8`U2j9FJ`P*74R!-r1~Uu``dc_WwS4lY`P`fL
zRoRkMx$qTd&6}Ui<@>Ch_ufm<n;Vi5fJeb&PIvVNv~kkFc@9E7WDkjcOX@=gT*EoO
z8e|_b6u=YL`G%U#ae>RU(R$jcwPI;po`I4!Hg|WTj#<{C2t8u`Ikxpu<ZEPwt7LSG
z0HIHJ&?(R6!YU!R<X8*sJ^ypTppV9;Y}K&DhpBHngJj%vlI2<XY}5aoKAgq9I=Y_I
z8j;+%z+dwo=e~V{hI{C9Yo3xA!`8hrv%ZvIS}{!JySHOKKwnUJACAyc&zD)Rvak7;
ztQP@iafJ2{lL?`@C>U2PeMfg1O@Ft>5w45f(Jk)sn5aX~K1WZ~Tw9v@4efn`D%<?(
z%)toeIFYIjEog^s^LK4|*mDZ(_V$c9f-F^HNDFVa!k~&pukMWsPbKeH0@FLAO?d;P
zxr4Gn34NXU29)$lTZm$$aQHsw?7jg}%|}8a!R-}}pGqGh0;#lJl!A-J(IOuVN?;Q{
zDqgp}TD(u){3aj)%+!$c`>7?`%Jc_1<8M>q2Q-g8;fRCVzd7A8Nxa=+I4x5c7LuaG
z+>Rs`JrC<pE}o?Av4meAb&HpzhcCTC-grvd)hwF&J8XoC{#G_km}R6h&Q{Q`N{Hh!
zMC;%5t&!{WPJT@-%G*N=I?nGMSTuQ{uH|MIqKbWDX)r8ZC#Ys9HA@WaLq4y4#C3+Y
z;d(8q6C3DiM8=ZeWouL1mxlf$7>|E5EpZ%ZPWsKDJp-%6SW%aU!5tdY8eLQ0y^AF3
z@vbd6&S;t9(EJjH7dg*X@s4YJ{04c7l&?_2jfw}f`HIGO=B;6!B{Jd+jKDK#b#ETq
zuaW(=s??4n>b@3lfw;^OyZ>44b!ZjGvXH|!b<COaj`TX=U8Ric-KPmTb>By_o%+G~
zK?Sm^SEbav9KnuB&TQ+=G#3E~y1?527+R5qz_?<UzTzOM$~SBGx7Xh*vx0E8!Y{AR
zc@RR|HAb)^3DT?_lJ8AQX<mAL5ydjs+k+9>N2vQ}IbD#&--jNR<sQzXi7B5Ri{$YY
z_`>PFu<>O0JPamwdTmnH9PXNj6Kua--A}_QazHuc4ME_5E;9^_rL+SATG7TNFAf^;
z@2)nZ3AQ6K=#;l>HcLrxYe;xMIf#*$OcgHWI5qzv+{xd6lfCptRfh9ByuahgebdC^
z-Q)S0z6W+89bwFQIYp#}+^@CNC7bW`>&h}^K{!NVmxA#d!R*^A89IrKncHkw7C(L_
zW`qTj=(+~&#K|1%ByYUum21a$pov(CJ+Lw{p_-tLi*NOKOXDVDV5_xR`p}s{#2bb)
zmR_vPb&^${C)1(Y?fI*6(A|sJ>a`)z?OrJ4c&@nLF2~IN+1?1FLZ1wqKM|!J%L05Y
zK^JoP)<wV2)pk?B5s6mST<RBk){=o!i?ZW_ikvzZQynaV?P;1z%jxNh?P2Xe#|hkt
zXtrW1Yf%UJo(1^^|7-C~Ey{eDLf3>4&eo&}>htCFn=t6!Pk)W(hy!O+#d(J{y?ZqM
zF#0n+(vI)b6UJ3<l7Gjqbj&=%0?J_N1Z5k-0~`PJ(NtG$P_wYxd2Zzc9g5?MROU`k
zepOHWChfrDZ1{`!*v>}YO4ISYeLYla)B9D>7)7fQ#?2)+mi*SZk}bl>=cF9)#5N^$
z!bNOY(++y5GHTc~(O|WwXXK_#C)l9l>1OkX_ls53dm}tY>O>E^=3&F@yMm83EaCo(
z0(95g=fy6U_a#@eyU<{@<UnOI69gTNcM7#NJ#_%*@&?`M;fC#ByyX$5(h7DBbGp~m
z%e&GYt&#&ka(T!Wi=F1D2Ui^-cd(rV0fVzwZQ@%tbQe*T&X2;V^=Ys?57N2v2!4Z5
zObCed76E({a%-QIFW2^}#~7nb6Wy|OV<9@?LO;2ql-qeqZque1g{S*fUnXUOVdJ}*
zZaZpYBD5=HmK=6rk~jEYbq=k*EINH7GhCK7_eEWix&MK=&x_(7jD*P6h<BJqO%+EL
zdtmg1^x$@##u=*ew3D;EU6Ov=v&YyHM0)*^!C5DMsxH}k(`b@g6CtyDdM^7UB=03<
zmiFnbRYetz{71mMB0EH`n#dyR*OVeKyc@oiK3`d#t#4pp&|*<@Hm1hQyho=galynX
z|M@w@X7m#SipzvybyMMAF}AbKsOhWa5TnpvV&Y2vAsWs$XB?7}Qy1He6DDGXI7?=%
zO8(N+=3pbg@NuDL|1)!uqqeh-#iZT@wpP%XR)tadrS4BdsO|=ePJA8R_iw6?3VERZ
zM!cwTvXr8pxnKt3lX#*)l*zTOPv4IQhPYRP@aJ63PoBlKE2q*hPO&Ft@}H``SmT0R
zSQNG+r+4ACMtGpYzl6IRiH~2c4kn%do|)Ttw-#$;#D?&8Jf8|P@%PD3rBOtoF3@S$
zBVAF<m$+vwJ^e(7woq?|K1==thBR{ji-L{XGbH{NM*SueljU~)fK;2!g;ZyW|Hz4;
z37od;Z90OP7nAYv*1elhnV7dkDSsNL4(v+F;SE(&(v1{_Dg;6$NoMbA8Gqj}T(3qt
z`D|+CS_53tT;kcVJHNECxZGgQ|B;tOBiU&aGy0gWu}Nf?qmf-Yls38`PBDJG6|Q4F
zTK2h4Wd!bZ|Le!rZiZG%)m!8GOE-VEI=0TDkV_TU<#x12`$<)n-G6z9=-i@?!)+UJ
zYxowmaeh9+!F@~kp1x`OmA`{kfZwaM+2C_5&t35hl<wZ6a3&gFTViXD&LEBITj%IX
zht}z6QBQY`!G=S26Kh2jY5F9_;=xGMeJ-d@&#uG6&LF9pqitNq!hOmP@=01Si+^jc
zdpFoNY<73hsik<pRa9K-$ZIz&04NWs+|OTTE;wuq-4>$?hMC?*=KOoB-XVG`*#_(Q
z@V!(A*DgxP`xTpka==#~p~Lfa@zET{1dN*SFQ19OZ;)u9cO;@NxLezOKbHA8K&ayo
z<9|&LI^)^2sCh-#(Ff^y9~}HzQZI!t=T*#}Ob4s<IGL+blO<25Rv1|`+$^!gr(HZ@
ziE1Q|ceRT;7s>g83%~Ej03SY)My@rmv};&4sAlOHQ%w2OD9T7Le1l#teADJN$Dv{K
zYr{LFl{9IhKApI7XXX#mx=U;;EccjJgbXkWWU}NFKQ)`4-gKtMG0)9p3@+Rx0@LIV
zn&Ly)$t$w?=%6>%NkiWFE{iqIA`&EqqxPMy%oC$?vusSalH)Ql&7SJD(fNlaq+WKs
zO4KFg6?p%OY69a7FWwJ-P}jGoy$Jc|BHY2s7Jmh2fTAsSq{ER``EGpSOyLv7R0R4=
z0kG5Mul%9tFRa%Y*gZH*!pXrXQzYR&_8R2sy5y!pab$1ix&?!{yN2A^N!?t}88&$`
zXo?{MJ8A{hQbjEz`6F{R&(#r@)5^x4)oFVzXNM=y@%uo{e340G(+kiK^}?vB)Tur>
z&J?v~G%R#p<-GKSRWYJ2$UVKN%z57R>0vz&e!hrX1Hk;v!i*1@L&eP!g}k`K6pebm
z1m0QUSxY!=VIijXv}hWzO8mscoAK1c{J1Fj6M5`_9g&DxiCBQcy=jngR&@gSy+7x9
zr%_JghPfn*Ig0s!*je!yT`^?CIto8Lz>r>@0*@=tdwI%w@pJDA$}w|G6;@S>w<o;1
zw<iQ8@bq1y`rX?OJ%`%3P$ZNUwKLgj$;#}AHXQByU2;M*Z_49B61ns&n{yu)gM8O~
zpv%DftCJ;jNb}~c0LOV51+$1h%;q0kt2MrQWE<Vd4ToGiuhX6J72kPbj}llKsB|<z
z@T&MpFi=fPxmvIkZ%xEeLHPEY_m$?{PJDViigRodcl~%)7%c`5Y}m$1hm7GHrq{zj
zWU_uW9<_+&_(Jt^_LZB7zi)uact!iT5Lb@C6_7Q?`<H8mdwNajx(vj(p-g-8LYsu8
z+B?F1W%1Mh^8$RNU<oG(IidZNfJL!U*jT%{CiiyA`sm*Hy}I<}pErTGTX7c?g40V(
z8*_6RLT>=Ck2C&h+C&;Jn3^;>5~TtWfi7BuSVfby)$#QfQ&sVbM)ljDioQs9NsF*I
zx{<(x_kL#uZeIuoZpGdh!Y3p`5Ld+`39Ln8Kg}{bhFi4HUt2Z*N5Xgx;Q8aTHLP7U
zt`orv?bOAVCDZ*T_(u$JnA+EJWvxfZ@d}F9a}U>jwo%Dru~yC80)tTWYmMC&ckjrH
zrUyykzb?%8#P>th8I+0oV`q^uXd=o}rC!Hzn>m+x9{*#<e8s~5ZQ*$hzIqEe>j!lw
z$&;lGaUEz>>C&*ac$^GV=YxAcXvx@(jC@8d3fS<d(FZ(FKO~O`1wc$`La%4VE@Ft?
z){gT8n0?L&5(iY*OR(F)!Y}g*QoDU8y<E_Z>ShNJm98uO9&LWgzX{$L?`WHm)U3Ys
zQ`;ks9A4Ueh!mI;j*b}0LC*MY_>ezijoLX_K^;4)y-CoTO-VN#<ELPk$vAEO+d6nt
z6_fHIdQbu2y{G1qeGr-cwvRXs3H*ei3%%2-6Ee8VewO{#g|i3DO*o8*-uy-i-a=D3
zs}^IkPB%Smc3dhAA>P-aMx7A;3_^27J9=2aNsm6?NdwAMXw!#_at_1J<Y9%@d;bXz
zmy+45&1!k!td-{N6dykZ*L%*&g={_ndOL%zKAny^_Jx`?+T(=<PYMTy&awD5HLnK!
zR7#M{81d@eMmdKBbgfd!ADH2F40u8T9+i%wM`ES&m47m{q!UOv^$m^ijk%04QsiQ@
zsoUXf9G@yTrh^moiTlO6U23})YT$ndvbP5_G@C}xXXNpl&A(2W8DR2PwFz(Sj>jZJ
zLk=VZ5qAd~Y~Ek&t6Po3@uWkgS(G15*-&C*bBx0R@x|JnA?@I;DM_eM>n-fM=tuJ)
ztt1rBJwLaZL^Q_iwb<+j$HJifTzjJLGrnjcS2%fFeDDzMh9$lV>m0_5StUTTi(EZ?
zJBgQNp~<(WAZup6e%v%O><f<V@|x*x=D0pIHR|)W?a|+F{u=le{$d-K*{zlfr$(^O
z{nmOloA*gTQS{S%9EsM+jUu|v^X-pP?e{us&%ICnA&{b-G|ARZ=q=@j#f01sz0YMY
z{FVFQBifalSK!MwwjD#9+Ul_Q+N+ESk`Wcw*|t9-_}Y+L<wGb%Rgx|$vza4dR}=nI
z&n=Mz^f}fog4=1#i`~eMALTQttP9tVx`73xZ?S4R?T@P07MEVc26o?}BTwfcI00IM
zu3yyqDhL`|O<EYc^ZQDeTTHFo(0OIE>XC@Fw^aYat9JEAIm-)E2Sxz0&uP!3pt~H)
zMcUN2GCKqAo#^v-S2M6#E&8HH;Qdj<wq=s{`_{@KBtCPB=21}%1<MI4`Jaz#wxfMH
zuabzPl1j()VMk1}i>vuvK;XIiN)sjUX?fb!b9dJ{+W2#oSSxfXbpT%+7o%s-($sfW
zQQqH4fuNN^B#z%c{P~4jFb)sjH?Dh|54mD%_(BTp*bQAifZJM5S&R=)r_IYk?-L37
z)uL7_pY){aVOVQkBeWpI{mG=a4bKl_(n7bw!-9VaxjyzQ@-oV%Llh*npG=(y5zl)$
z7Q^>9CmMB+mj*q9SR=&|I6Q&WQ!)oM`D++ZaFGeN*<-;_I`T_dBCitTsn%{P(4m-Z
z7>54rc@NlT#D5<tw{@8henfx1xhl398X#EpIye80HpTpIH$(E0%rsc!{$3rk-koM>
z&jPV$v7I<xOTo68|6Qb2VxbdM#+=It9j5kZa%)fl5vxkE7?%{>teWNMeh_}bPZZra
zkphyqt?f^TBYYY`vzPvt>_0X$Gc@};?|d?b^!|OcsL!CNvfrLqe{LGC4hq40kwkd7
zdag99OxymB^??|Nn>sI6N^|pxyzD8%pHdbtE{Aif-#+;*n#EmB0|DecaO;Aw<D<h<
z)gtIeQ!O;e`3QqIyku(V{OdGuB~py1{1bj=6sF&>p=&U0^GR6B<|m8ZsKT#f2U?1j
zS9XAVi4(^Yt1Q@2e7XB=5*boXD0<-H$fj0acH+;98okf<CPw<hvbaa?;NIj3wxBbf
ztr@zaM65EV+xBC{pAX0qw|krhrO(e{q|GMJ#AnU^s0g8&He?Tu{tW{{vn%ewB$)i;
zhE|C_JF*w=u@zdkg(@uIPiq>jjq~XF7f?bxv0?!#qWx*B_l$+9^>`8;@-J_G2@$)P
zO#CNZiOvZ^kIjW2@9IDC5DA64LYqdDI6d(r3O2taH%F>Bb&W1j%W?c(SAeEQdI{0Q
zx4N#_?y;K+e{3|pzWxf5+DR5XeO5Fr=aP3$9$m9keob8VZ%o;7Z);Ft>+VI*SRNcX
zk+-rsU=25miri5RqjXJoYU<ZK%d;ltBgVM@c<*_|Eovg&obTRE3ApXW@O;5go-o?4
z8%~w}h#`!#jlERCoTlj2Z}S6ogN&pU%ZMKye#8jDlZCAli)Zw(n-dGGlLfc^sO_LX
z<Qs0jTOAJlM!`osyN&V;3gb9HXkJpsse%v|+WB11a`!D?D|k|L-PIae(*YI0p+Un)
z8#1a~JD{#k(4qOE=j~l4>L+^H7y^XP8bz~VJ6)9T9p<^z^qGL~^~c%vuO_|<G+v(#
zG5XE;WV-Jh{U~k;PG!vOPn@Qko~uYzF>-wMew@Zz1xNGLtN2!wRpgF}JGANe>G}t-
z*=zqYEu02$e;zk!RtVS`oJ#L?R&>p(CcLY;O^SQJL;ywX{d_tG&eR`eF$LBOkjAUU
zKXD%t$YmZg--;l0MOyYSp98&^&!^HnD;hx6OASdOzJq7WiL+Z!D=z`C%WVNCSeC%<
zYbGgMIl;UhyxeylPQ<^@fjY*V7glLso;WVK`Jw{vkZtxLsQbPU>&Wao5~u5U;NAEG
z&Vo0)X;i3K%^VT|!cw^2>ATgs1%?Hy1bCcQI5lTSqYb!b^fhRsyynS*Q&K1a;LujL
z5hJYIWtg{Q%h{4Wiuex4#78Ul!gh^h_b3h_x1NN`R8D_m3b8wH0R>G|uZFD(@6-Ab
z`+B-y8y(8j`q!rRU05d~%b>6AFPtC%{EUE4%E`G9)Jk(p2jw&;9E%C5`E*we)djZU
zHdow74uP}URrjeX^pb0Y+Do0-AC^XB?BOWzgp7(5!Q|#Oy!83NZnKhr4+h_X|1{uh
zKGR#377tX2C-C!ncffG+5zOd-Wqc%%nHlXblpW6%2#oak`(yn0e1HHz;*iO!_IbU0
z6l=UHNvp;z34l0My1l6lcT0M0Y}9r*x|S?nx)2!J*o)vBTSKJS_#sTafQ6pBPt#vJ
zDr>t}xKhI}+kkx?;461xf1`v17NOLtJ1%P>Lsuu#uDkvGN#|i(s&+Sa*y-(G+_^__
zZ~g1x1M%W-rwGU(SK7|Y0M{$peW&5N-ZIG*@3a4cj1@elI%C2Inco*oKG^8ye+Q;b
zo@x52!=gvEWHSGi^xF>VU`T}vI@9{$F!Dk-V;a$J?tHdFk25fNyGg<hxB^?Yt64*_
z+PGzQ`5RcK-uU3E*J`Y#SS_W{YOP2AO>~)=&p|y_^2bo95ku0>!_|G*tnH+{owWMN
zg<y<Q!6CwE0;K>O&pk6n3<+KpV2YP?M7Gt(5>x}Cou>j(hN$k^UK;X-wf13@CuabT
zGxre8WBA(h+XR*{)OqyyATYS7IcG%w^ujWU!Y1Ca%ev%+o!_yX94*{VeA`=>FaQv8
zI^&sql^2&^@h-2EIpv^`@$P%Rdw*;94Y$dekM}my;!;6QMpg%bp<v>x$pn;E>43uZ
z7$t_@)$b&kA6j>NgGv;C(PYi)J2l?sK|`Uz`kgQwi`%Fy%zE`X-K8{&*-h&L5(A5e
zmBsVo=ey$E6K>w9K>R=t+i6#G6ZFw1S|Qut7mXwaDeg1F54~0XPNCe1jK-DSPiG$a
zNQ%b|0gQjJvX_j0{sp&r7@3K#uti7W4tMf-O|D((Gwdn=dg6d2E2gMAm<Qx4!BNbA
zD%_gc$k;8HA2Qu-iW~w$f|80jBTmC+KHHsf>HjeTtvwX@r9w+dFMe0n$OV6+H3+%H
ze&DHhQMPQ3*#1FspPjyFP?Jaho2!TP;%u?>l$H1$Eom@~T637Lt5oGzQ2%u-+tbB!
z^hyUX5#;0krRJmot?}-+5uZ}uh%AmiP3d%PIrm|I<_?eCG}?3-?5c8xE)J7YDjPL*
zxdt^ojXuwLq4l2Ipfqek1(WBV(D{t{omI+%x8VR*&FrUFi6|l%ZNp1A1~Cnd<u=;t
z3Hix^xZ~N}121Yu3gB@QL*#IIkPz?Rz&7?>DF&dw?V!lz;!VrP;dA!Q;1HQPmGN%L
zE|=V*9_Kv{)VD#Og^`@-`J+R`M?aua1lOw#*hPy3yES>%Fq@;a>x97?i*qZ7+xlNs
zx`;G2v~>Vf&EFfidoSAhba-wz`A`HNo}3Bx?N;SWK_iO?CHBmWr`{hy0=jeD8QTXb
zYJFcuK0~R08x4(Dr)^>-5Zc>OtuX8DL^$VkEV1C)r*U&1{<o%5Gocf~G02c<AYAG{
zual(G-RKJwRkrH<etPU}#q@M_D>yOy_948t3%=p<uoY9(DCOPPdj>OjAkwEil&1GK
zOJ1%>)kNK^SfC-tTwm>|ce-NXJlK3qSTJ5NGE*gm+uG*26(+X(Sze&8t0RdnV5*=a
z4X4s0WQTe`szU9{!BE2cMe6y&s}Oa{p)%&N-f7e~(|%!IyK6;v(0y>*=8p8;PGxv<
zG=Vq;^y6a3r_&#fsi^umY?@;?6gt0>MR?y`4|c_eJz21MgbpP(!hIkKm5xa(hEbs<
zq=UP;y>>a}_Yh7(t-M%oX}R4vdm_Qgj|H|k#<5#O*<32Q-!U1YiBD+b2_&$y)PN|@
zdw#uzq60f8`hiXE`kB2qJ6k?5XVSApv$S*24iS&3vr*bkialrlg*JXfGz}|0zV(#A
zmM2Q%!GT%QpZ6W&6oNL%xgUCNhj_WrV^=*<=rBx%;cMZ~p(6DkEX50CIR!9<+075_
zG6s+5E6=an{n&CaOlSfpOq)Mk7y7R_9mVB0(JbTTEg(_ZfOuD1+y~Oauiu1RnScd!
z<V$VIP;sbI8zcP8B{gF@?VDiSMh%0iJTwivgyTzPNPjRuAJ{IQYP%45S%f8@zu7sC
z!`%EQ&8iti6D0c1Sl*#~hrdXYA}%vW4h1r0IQX2kpEq(A&BCQ$bWg;a{A*ekj!HjH
zGtUVu*3B2WN;C%m^)Lwk6`+)sFpo!VG5nvhANc^_8^_&=ph!}V{EOWPyGx+HY*}?5
zFz9|HT?>0S^VhiaD-;NDA#w044MebCf35!GP0U5q_Y^?J%4)mb)fF^npVS<DeDq^N
zVrW81=77uLZK>-kNz7A;%P_IaKjn4cD7594%la<^g6EyKJjTGfQ|n=)r^V^#@qT0q
zrF{kFwAhhB_~p&r)&9=;8+_3dz_g!Iu(O6b4%Jn6#n4qzFr@E4BhIVD2S}<Uam+8~
ztb>>lwN}<zEXCR-@t3W~Qe8cKCa&&Zf{L_syn8$4EO9FnYjy3bp2F?l)IaN=)me|d
zL>X%Yz&A>mJP1_h410!}Shk@j6@9Bbui>xX+lyn=vX$ByDgZ|5I@w<Fa_#IKnD}Oi
zV}LNKK&v;8<N&`y(k(?9>sox>k6%jeoKdAbIqSA*GGQjxeqDn#eb|fHsen(>Q5x$J
z%8RKQHI(KHiT!x4G&H&QlPYg|un(TwoXz1Af@Dd4{aCC_8NvEPZDRb^C=K(Ssw|%E
zXik(%4R$`+&c&_owNG_qXRsqosnaV6w~N51J41@fj%N2q(_x9wFlaCt&H(*0vYM@N
z2Ak$FaF!MX?thw~x`Va$(2i&GG?F0n$tXt@J$hyo(044|+xN)U+CtjelEs5Qh=GDf
z*U3Xt7I$83Ud<=4#cw^szkU=-*miWtS6;aihySvzIj*clh)P|Z3VxMzaEDP!BKU+{
zPKU@Xl5u!INJ(Toa*LTY46ELq2hz|!!IPrC!hm(BB%Ew}OQ%GAG5+3zUBL4Npg!cz
zn^>a;VZ8B8jgtU6=Op;us0Bc*s3~#8#I@$ZU_YI^ei=xsF0j<#mNX<?9qP>uX}{u0
zirlewn4`|G+Lnd}gaL`A_b2m#p3g0Hd<0b%7$ZSmk0mCb7hGAo)$^nXdm*S&#o^ch
zncyZmUg=Z5i9$(&IR|yXm!qQ+wukdLVDboE5Nc1`ek<sv#e^HH7YiJ6%GRp2HGI0{
z682yca{r;2%D=-JY4R&~U5QOkrjT10%<C4u{;5Jp1It`jF5d!;0zkJ=J;Pc&iTE%T
z0gp^8vq2!UDamplD*`6BO|cB=1*17ak_HmrF`G&yFgvD&WL8rJhi?x{UD5VlQAc__
zi9Z%`l&P|I@~IyC`MqPinNpVN4n--GtU*bYCN=a8yj&6ESey~NXu+kTD}{RKt6B!b
z{X6TA@Tdpb6lmO$97?rwZnB=Mvcxdo;N0(Vq-P+Z{1oTc)@>`Yq*-fmUA7MI*xgp+
z3)nDS{bW#gGv<(S;kfxxkhw6jL)pykPK=R`la*xH!tP+CdKk{=Gd9$+Tg_j-#W(?0
zyGxcCHsxp8%UU!aP$zp*M@uc%|7YKi%nDv^v^VhwOM-7S^KUP+hxNrccdsc0PY*uJ
z6AZEcC~{uFX-Cc_QmQnPfehP)U*w(NbmV|jUa;B0LWM7LYZ+|c*y{jZJqeC^P)=Io
ztnGG&aIi@GYCy(NMTbaxuB4%%X~MF=vww43eBln~xnhvp9nXKWN?QeuQ~fOv_lZIv
z@55B58fUl$UGO!(Poa+K#VxzLX;ea^r`y?vJF}uAC1Mi;=y=nNFLmC#kNn7ONeT}t
zyqXXgY`}FW4iYH}Mu=Aiy-Qc(WUNkY(!Yc*5O!yO!p@A#Ht_^?NP1lO)ktDXEYR(M
zdawR?MN9+JE<igkOV@eZL7_wcSeES)C5`;y@4e~S5xS)EH1QSdI}r54U%B!ij|gg?
zuz<5>F=&C0=Pi*EZpZ*C`<CF!e<O`P&BEu)lc(Ntc=>bg$QPC=%d7n}%sJAaUef_*
z!1i4d-%%k{3EW^)rLWI^tTE-ehRzF{YeC2A`#G-ww;UpUtgvKTk8L4$E9_kbqpSrB
z);%s_K84Ze#ViS6r&j<N`D;fwA+mM%Zv&IE8B|>!QgbBO2IfCeHbbY&j?lGkO}+kx
z_G0nj$faShxM?!E#}<N9sZYi0X8RRSNDMA~b3rBlxR9u1=!~A?tBb%QP`9Ii!6Cgs
ztzgh{jD;!-r)EXfsI4`xL)MOJp4^v=#kUx=LY}4?$M949$yaN$yY1wfQkxqS0)o=h
z3i#YuBzXlfrpd|U$~#OTr*RgaXtS`pJ(jw_C;z8R(EyLeFcNOrCTH)z_G^n{^PK!{
zV@+=tqt3gxo2_YiG%s;Oe(3cN@>yp(iNgKe<dX9TzqI*lbONAV2Sr?L#f!z@j_Tn2
z_57T$>-VO2N1R?uR&mP*j+2c7gl#a%fwwwd|M*96?_Acjo5+tEVYLQNl?j{%?-_e)
zr67x<J*h^5_FLP)dkVznQ+ULVmRMM?pVkjK8hlMMC%HykDTIvQiY|@xnIlcmU^>f+
z<KEqTZCGsE)xHtY`}a5}Y(L<mQ;ylclp_s`GLxCtrPr70L*t|!$h)89Oao_QTYh@!
z??_wqMWM081VJCKfX|I7qLXA1Qmx4-Hk4n!jc3~lcO!YMk(SaC{D+SDHulUF^B_u)
z1G8$rd1F+84to?BKz~^MrUKn+fmUPtpde|~p@0CaWzGNlEiejXEB!Q#21CL<RcrDL
zzl9?BoVC`HEXO&?K7lQYl2ZdEw|^#r{#n^kt5RDiozLv_QhLzmNw~<7<hqEkr;*4j
z$QF=@*jE4Knvpcy;*Zabra<Yt;+(81RN~=G%)p&QBok~!C~@kW+iK(qU}@C`Qw4DK
z450fP50y2c7^jQPwj*M1^d+KrvbkEArFq&X#V$H<MMgHTOCN)1HBKtp-gWhnt^`T{
z=W{X%Az%fNlOJr}l!D6G5ZmNEj!{3G@+0>pB`vC|o)WY|`XR-&VK<dz@;k60Vfui`
z(I!Uz)#QhPP%kX38qhAp*qsWV@@#L4|NaF1dGdk&w-RWZ*kF?v@yN^PfGCKJRW#<D
z;?hAta6ArPPZ}s5K)GV>!9BN7u14PAPsgR91l2sg9-%P7;^y~czO(J-srI3G<qV;3
z@@G$PcT5RX>8Pt?A9z>5T+_bmlLf+p(y%3GT1Y0#fPR5mgcS{4Yw$7qkL@we+9qQY
zeH%{FFUdP=>$~U%5>aF-?YM0b_xFoE?u%dGIx?=22MG&v^OXR8&gv8^f8%YgIX&Ex
zJh)^jfaq9D8hs7!SoUsQHe9>;`(*a_)5KE^AZp*8hrHiH<j)vc=HIL`eg>vBqtSxd
zo&kpB&Hl$mY{hq)k}dDMQd^AC%$sos{jLp;f96P`POOFAdTs7G{H33~@Z+*c(9n-Z
zC&jZXKwWpFi7yZ<4MulrA;%C{NU%P|_hw`~dKLqH!;N;ImV`<*+UI^lY7?tc&Bu#+
ztZ^s~Uf6&_$LPDq1D#MVZSyMuMsAe`qWr;55>+^d-$33k;|i>#^lz(;5Ngx-y>|vV
zH*wXkn1B6WT3u(+1}z=X;m?!cN4$-T)Bsy3J2phVD-V=NIX&e1w%wKz<DD0Ig-wbN
z=Y>qU-+>bFKK*7z?fwq$pY#fIXX!%b;~o3AQ1Uhn%}3u4h*`iBkqn%*SZz>m@CiSo
zg~!R23Rd#jd!gaQluu_~&p7_m$M2xQ2f}<)=BRiN2!{bZ??`j<L2~+BB5mPq7Mi{g
zh>YDfE8Wj4#rjbYKTqn)>Wao;Ju9n!gK|6{C{tx^W;J~~@%#~S_Izj33Ezg>hf<lT
zpr3ACL8HKJw>w>mI9#=M-K{R>nSvXsNit1=?_yE))uf1NO^BqP*y^0ByXLW}JR7b#
zSQ@tJPR?)(e6c06i(I`Z7Q8>{ZFXTlf~>>SM%HsRxoDf0aUukHk=n`nlR9*M-}IIL
zu@+AN0dAV?(2K%p0*_~#a;I;ZEi&wdJZpfvF_?m-6pjVF0!Qd!5CDoNfoaHQL=q#A
zXYbWAt<dw_`~WhqQy%afx&4=+<Cn;TyS}=*4yJVyRt|JQ!Bx&*)4LAQ+^nMQbyE^2
zRTIntvtHSp2C%5mYq9jJTWT|YDe$^v`x({Ka`98{&;Y$Ca%-M$KwJfv{O5$AuxSUG
zD@vO7Yi*ZOl08U%2o~}%DnB7L88abnTaWSI!Kklk;ob{u6;I^T5)WLBReWK5zj`uJ
zRaYC|@=t|B%Yqw-;9WKJtb+GpS0`Z0o3SjTnVwbP349ss(i46w4-oNERI|vmKQ<mk
zA9k>!Z}ISO*4>*<pMUtRhfx%a3(AB0KKS+ta$e3ABZLsgwTLkRIwO~~>Ri)o4{z6w
z@Y>woiS-{%3WBQ{I~ghB;swE9w-b%x@yWrg=x-wDU*X#z_7}IL;UtoS%hWdLt#NjZ
zwxt4CcBuffM#91%^zB@Xi23;AZ&dB#MKNxe&dAPTF@LBnjyD=i|2A!%*X`&4_b2{Q
zEvB?DH!?QC;T1!=ksv4~kIAOqL!7HiCx70rCzZ!O*;5kM&6C=WjaQ0hp){g1?fYHH
z^M*~&o1lVWP|{;WKraHr(3RMmTXhXARzLIpodJ=?1KZ!kJv`qdhaGEjSEzNW=_D<F
zKTT%ZAB--tKO4hdXY4gG*dNY4KwIdK2zZaE!G{gBv6Fvu>o&P&fV)3Ro-iYa)ss$5
zdoR`KDjbHCyNNvLE3xO!tBZQzBZa+Yp%o*6_(|SF;r3TF(W7nKzfnG<kQHgZf86`R
zP27nmD(FX%7_{-@GXA+nz|TfvU4s+F#tV;YQMLi2+njU3ms?q!UUS@{<-_Xh1&ZlF
z5~eQ6kKAgX7l#tILvC(6=~sy_c4UfMZN+umClPasdW01TXl?Cwljl372UJcfXvPN<
z0=I3`x%poWKe&6)Bj&P7mx$jM6ZmWf==4s0l^Gz@a->iTdgfdG49vZeP97=Dv6td|
zoF^L%Z1F-lA2d!H`rXRU*EPq{F=9nppKBX=%dPZA;$Cb}Kat^`<C%l%`40V&jwoH1
z^O_=OAZ3|McsMF+F_y>s_ft4;*R!17jT$2{<JHt7f(G*~M7I)teiFmhDUar&1L{|Y
zXf;}Mi=(XiK(HSY23##!7oQ*!rdO*j=r<PQ<axU%BQ(LdWIU!{-74#49c_k9l^o*Q
zipT)I+WRb?BS%~+t|l4PY&f0Et^#;(PG+DSStg&or7Y1lRde)30@M`+$!ahh{OG5p
z9|{sUu}SG%x?hW>_54_gPTB|~lDRG=FcNfdnJOe+h{dY%6};m?dX&kQ4Lm{fXnaV@
z@an!v&<o+s(V%I(ep?`tyD=X+*5jgFWpz)*tw^)W)3*D%AOH~*J*E`sM8He#vnLoo
zzuM)pLSdTPggR*UvSHPy>i095ZGu{)k~_){drtq3d%idlSHJ~JekzPr%&`TIYwHhO
znh$aNtB2<SGJu*u*Gp00)3au3$0&2c9QLix!Ss;Hba<sv;uhDt?i{@UhvAM!F6hxy
zHV^}Y%RHGmj*Bu=<tvy6O_)2rOIl^%W0G(tvJ2+RYR$$V7c}R>!o?L__t(jR%Ub2r
zPXcogw+fonrK4p_BZjK0)&KJXC<p#5iDJjp-UoBgjfM0M@2r!v?)&$2y@GP95owxP
zv)_HIWA%zeC#7#X<kA-0{`Nx$d~3?sQf27u$nPv1=1B7q-vr-2xQ^RET@Wl0J))6B
zWaCLueD(AUeGmb4<SG2YGd0B%z|3ZUxKsZTQl*Gs<!u0!Nu6bW;MyPC+EAOl>QCY|
zwAU(E>A_s$h+h5PtBhUoG1nv#JR$_$NtMPobJFUcwcO=4Mk&jwnH>QHNx7^PSp2#w
zps71VA|{_|u$=FqUin25yXubEuq@;j3HhG%1#TF|5FvzAc+*4&f+D+q+)1$oz%L)|
zNOz6OzUvRLv%YBIa;ElsvMyvEbxSd#whbGU%fT;>Bd(;-zs}<cGR3!Bs943k!#qVH
zW23Mza+ITO^mW);*qvlOkMKad_bp1X2hH}?qH|7K&{u{^n#|dS4UsxGULtRM_N&LL
z$r(uef&PnSg%pF%S6e>?$h&4bK^wg$h?8Ss8GRRuiB&~*t3$f6&n@ex<PD9{HSnt=
zbGW2eIONSCq`4(SJu|D>Qg8U;wQ%F*)U66j$o_?C<xM<MoOWLp{rV)+Vg7X^QrI*n
zGf!2{!kfUmtii}XD?9~`<rV4Ifu;4lhUsEnSsD~HOG}yqaL+Sn%@Fv-FS}xUny5T%
zm1(E##b}9i6m(D&SDvPB$~J|!bZG5}-sq<(b);a@=WPOcdIYwvf>+a4J-XJhb;$5g
zjG<l7jR~qRY24ek?@mZv$b*c={Jn%K`9-KemFL4J9!#-%t?r{M^SfpOQeneIh5q+W
zZAz7Q!d!{=aI7RdfpV^JAVVl!6|WV=cDfRvZ|<aMWv7e6c1WXo0e7=VFc@Fv96?~X
zcYd9M^2C}i#776A^N3Z0Z0-k6-4)c={TTo27w$tk>C{yg{MkzlR;3WFRss^M>!Nz|
z=^;eK9kNmH@R0Q!0D*@vPL`bEwgQ49I$GNd^Mrluvo~kCTC(6s<0iPLwhSG0S`3l%
ziL}zapEP8<c)iFM-Y**SC=2EZ0KUplW3H$dBtr?$`<SNXzn)%Q|2ij-;bUKL-?7dV
zDie7Kv*lf|0Aq&lu%PNp+8Q|uw{(hXRi}w1jE~kDQZDv?udJdSKnCBS3nE8FVns$H
zD1Q`NWv8l`p=1|vZHj3MV4C!y&m?=42HSr2OXW`OL~<!IwDHSJjBzK0Nrh~+kvD9o
zxPRUQi$&JsBANKmi(ZhaG>+O-tbeLWAwuJH3BTa~1CMT6sw|>wr0TlqY)uV9F*RPK
zxYiWqh6?Yp_&Ur7-sk?P9-1xsk|tiZxfL_*<Jq6UYUtU~kyfp%Tn8UgzuM*2YNBc|
z#k7ey^qw{W;etguFZi8kxmmAG`2rgsjwX{|B%;>XCe}9@UWQBmZa(<!nt_Q+ZKq!@
zZ@px$t5>b|)eYTv*p0^9HY_G5-nni|e!ftivIgK5Q)jG+(P|%s)d*82fds|gWpah_
zZOH)({{R|p-9|f>d11Oi*b2qRKa76rl<b@s7#ZH*l1mDzqFUXT^RA^I#thB_!r=jA
zD0>_(2fzW9cw#!7yU^P=fGPdEyW13nsv!ASiRq2rME3SN6MQV=ZBYfQ-~O8S?Q`c(
zuJn5&D_$JC)EW~sbvuJgHk}UWDtax0&<(&p50&Tv<bSy|!kM<<4F@#i?zRhQj+PdI
zc!r7(yC1aZE<%?{m-G{sEqFAix2kGZTU;C4_4MsR2}W5zi$!XTQxfx^cJ5MBkq0Jl
zaY}}^81)8B<_WVvsGuk^9zOPy{p>#qTwgH+)9UD>xOTpV?uMismNC+~VsRR9B&e+c
zy7gBKnTyq#UL>>GMS{)-@wU6Drgrg3C^F~syPsa>rgem0WyVnrv4jJO6>W!p3G-C-
zr^)~0bi+KDKpPXBLjpV2Nl&S9-oV`7lE5PU2BT-xKJk!^QI$*n@|vOirSf%kJLdjx
z60cEZW^Kq#<?UaKa4g{6s8$2QjHg$m$D;rhMCGg3wJu{H%jJV-m=E#bF=xW@x9D7A
zne7}OP7v4kbKq?)Gwkwi&WiMW(fI<}r=ENX{?ek5E3B0*u58Tw#@C;_(nm6Bb>&Z@
zuPv2?Rn-_#7~4|`vXWO!hK%F|`5fd{ewHiV76s%Co{YG><K(nC_Rj*T9AV^q8wviL
z(lssq;M$?pD1h%&T`h+ec}1a+F3kG<Pnn{y1=l>cJsG!YyiKZRm4UC7@d-P48Zvod
zSnFf}b7F#b)bEpd=gy`MBYm9jS`q)fyMb-&`q8JdOU$XLz>exFvQR2Es5${XDI>dU
zl8a~upVtfJZ!Fy%Nec7eoPC`N-Q(F5#er@{F+w4}avjq}50zG>3T@!;BR|{QS+2%E
zTLND%s~usenpLL)S}ybB)D|s~q4Q&UWaIs(%n+0$@%E<B#kffg=Yl`CE2IXdk!deW
zv=I`YH$U^##Kx&u1N>WAA-_c&ujHC(tQPR5N#pC?yT6ZxS?d7vyE;3L{ptBB>(0Ew
z57_|F9=z4Cek`QX$sjCZL_R5@^4MHA;>`0QvsDw~miSat=eMfgsQ$i6T@&SYJwV5@
zF3pTAK#1<#GS-|uFsQbjOaFjnS9qs+H!6ZL)ZmW^VPSkgT`hof4Dfk1yU|={iG*?!
zs%n1?zuCGNp=<SKtAS;$0wL6CKBEkAhWU9hf!fPoX5GIvR|eK}|DX2G`mM?Ljr#~<
zAz~0ps(>_z2q-X+Mp9~Y2$F*#(lJUwrCVvGYve|a?(Q*Q45V`-H)6!I&vP8l|M2|q
z-4FX^$F6(FeeXK2>w2H>^K}|TnD}r&6~8vi9M<8KnqH65-Yy018QmRk(9-t%vRwJq
z4Wp&StYOG<Yk+g0wB+To;P0nOvO*7j1We=lx1au&di2|r+*~cnpzT9>0SZOaBJ=Zr
z2HNQtJLUP9$*+*<<>3_Ny((I^9fqE#h6bx|WO$B`@g@-VW+9ntW2%Y_cS7lwOEz>q
zM*NN{cY^J_>kfPhrCXj0K9!w!{*M0V6-b);Z3SiLiK1sib0M36B~x)|?@c<lY)f4`
z<4QxZAmT=dzW|_7@0D=xmpcq*-VfG>vcwB>-`V0^UfGHncl0*cO&Pru9*F~`EJi1x
zZJY%g@?t@?!ViU?oK9ygUWz?*nyErMMv8)74WbN5>gXlng`QI4o49{-Lfw|aC2k};
z<5!#z*N2qT)mOKVzsVl^ysk5Y5YByqG0W_(&c4L6ijN&PoOW?FPj|ZCeGXT>qux;K
zWT6u${;^cRv;T_+ROw?&b*O>0rrbB#5Z@03*fb|83bdsOexAyVN48jz<)t`F-MB<n
zJ4qex6PK$Vcf90q1(NDey#CO|-x?<`?xiZ>t~Q@(*>W|0KG5Zm7kXX!w3Ug=%s?#Y
ztJF;cddxW~mjK`<C6Dh<Kl)4(q^lLY9byFT+zv17<_Dw|#=hQOt2v!-lpMXt`ud<&
zd4P^DHIPvKepe)a7oncsI0G>NPMDWf<e2=eUKC<#nkQK&W>#FBb<RI6S(EJ*BTHTd
z!>gr1u4P%qnK#*dM|ppXH(1T>SG8n-KJAd(P=Q`!K3#?7AS<!d{FJSJZTyM#kLm8%
z%iK~QR8JXC!hLdElrXL%CV|!N$N*2l?x%Z3fAk4RlzDa4^Rjz-e5uH$_NR+7M-7oG
zPMQsGr_5(zKDtxweSq({%$1hY##QTQ!tHX6-re>VUI%d3gtHK6rW$DxXPp0>H#vL1
zAi;??HMY1*U0$<l`<%nVslBZz);LZBidC|l`}XTPFWQtzS$<F_z~tr!UUoTa*X9hV
z40Z)ukN0L^b-<rLe|rB?MuZfk#JW~;bX*MK4F^zteak>k@ITL#5ax}U4g9#8P#4>I
z8{70ZhVhW(@Djk2jf59v6lHb|UcWivR$X`8EK>NwJU%z(Mp2gZ9=goYnDiAW${Iyh
z%pBXvjpOz?rZYVfrk7)Zuk5Jb3D!Yos%+j6Hmt)+{h7G%am~1>76!_4A&f($NL6Rp
z(aQ_iYxz9Wn&}ckbhI`Am+wsxUz5|+aUF<S5j-*=iil@a<33o9k$(zAQ<5GHJ0lc?
zZT?Q*g+<rUyw=uoeIAi)gIaczdq^XVtNhgayrwNX|LJiCdoWzYf23ciGo>|rh(#q1
zN&mvt*c_lx)13A4m<eRqRrDczQCr=4JJbLjC$;R!EZ=pL_h+jrBog=;wn<cq?fx$1
z5T9<SNq*gkpZ2B+!*L>mije@{oxxr@1t;uyuL+F#eL0+DfUyx8O8Hvbgms=84<Y}N
zz(nD+{gy8tyUuz&<nXcDrf@|8WA*LwCW5h>^?3WIN9xA0u`MELv7FByCnV4mE}R~7
z7$jFJmjI#trPpzFJc8b(gUlvhHl7hulLS@Nn0ELa(8EJ&vJa=^n0PR!*4V7*ntOw3
z;zAKJZh2Ixw+Y(Mnm*HN3}%9M?Zp|q&C~ZJzO>2i?=;-mi9Dy3$$$PTbc)ZmuVE>h
zf>I-4>6eJz;Na5g3&tpqitbc?LUk8FfC>ouZ<e-o(hO2`m$t?K%AJ^i0`Cyx$Lt1x
zosu;sG;%J5ZFc7mxthwQbi*Dr8h`U1@ciSVX1qSYRv2tE@?jQKY^vl0?)t?4!#Z#>
z#z*B^iyrsexRoX7+*Xq?(+dl*r2%7;*d0F2=E$xk(A~>?wUEs-_moNo7BjbA_}WV~
z*+fHG=Z6I2h<e+FT}fAZj(sa-R-asCXEr>1VOin+)$sekq2-w6J~&8#mGiW0@n(A?
zF>a}RcEM%lhf~w?iz@%m#~~N=hCBLVvIvt-w8;K+_~P)y=W>S7>Ped=DSSv8r?5J%
zs4w9Kjo}ZaR?!BNx|b>owb`&nA*M7Dx2&1jZ?0iOOEam$=Vl_|-;)tjFm5oq+4+N0
z?AIP?I~+4Y@KM|#qg3?gP^Cju_Lv5Lm@;_@9kUdd*Y8_Djb`SG>w5MEq|fVEe)!#Q
zK6~B{HTe*}bi3A5D}0QrQXKCnv@}%ycN`&l9Ol0E`oTjEVS6XkhxgZk4+9^x$Akzo
zBoAM#kyn@)<)(HWtr)c5V?eBy>Xij8uaxyAKOCtMFSXQk^_YxPHmXL;qTO62RaAxd
z>}oOJ_FLa2(|A~u5`LIpLKa}+DnHLK9n5Fq#ayL-J)9~<oxy_E?zz2+hw}?b1&W@F
z_A`c3FBUElgWj<WlRKsC$t=NnYQOIJOFyocY|*dTP>~z!5BhBGHJ}s~XQY}jx(fM+
z`vp8k+sD=l?wj|T+Se;)iLN-mDVJmqR9J3fNff$@Wq=DBb#>a<^~8=~sUiudUK(A8
zKNaNX*%>~n{b!x>s_8(x(35)?c0V}d(}x}B(P1Fcoq!+aW)b~N<wm&(&Ik8e>RpwG
zBE~FY#^}a8Dk!{NDJuY28&U`#&>Iyq0M`ZuWbvi&I2Tqce!P41Ou<j@J6jAUc)dEc
zDrom<FXKYPwQOW~)BdiYVa8Pf<@ZPQW>K`Fcg^iW?QKU{&Xd0;yjw4lFYzY6{|NgA
z8*gBviCUfIDcbLI>r&C>&L-{m+i;P!*<B%M&#6-5$FtkpsFok3OvwxSz8AtCDmQgd
zk-tjK5cDxpgx?pLT5vnbOb6Zk#m+~XFt<2}w1Iz>eHhxdx~*BoTjqo1m!<sHFZh0S
z^@H)VJXt|8mmf|nl3`Tp1)vGK9cEl(wAT%+M~89n2q@2#r}1jpeo;c^?k~kRFX(f^
z7PF0NDj&0wb|>x{N-%qw6Iq7-3$*sO3b?Mfq&;S<@1|E{nD;W@tq$LDW*_nghK$u&
zdY`;Kn0!%C*?J}650y0n=?M<$;UM(=y^|MgWP_PesRm&q9q`OhS>sTDrx-NUDI@5k
zrC67ibil3Clad#<;O6cm8On*tvZ5O5%i-T#;0TWxLvXMw>wkIcDjJ`#x_&~o#q{Uw
zNcz`zn&@bty``SC=Cnq%lp|$+^x_!z9FMMrncU@4y~ww|du`t<Tsy+9eT=(t-({f^
zV>O@&xR{ic?apDDlxD2k8EdrRR?Ah{{re1MbC)Z<8`6iO9(b{3`n<jD)jWAh&e6Fz
z?iYfUeXQ64_PAG;O+xJB-Rn^^ja;}*W(F9m$HIzto~Gse^Uj2|>`4T5q3=f4WZ0W6
zWj`(7!{f;ZfckrI+)&}*Z<2x$u#+QBHf?A#+f9AT+?(A@(&Krg?|lv1AUC?Unj;>x
zi||sY+6Uz)*axFH${%XNjt-&`Yp*On^_ggy-T|1|a;1AeCmq+1)1GG2W$Sb9Nzu~X
z1s3G-S}3?n$(7KdCrvLBO0-L+Zgi#Yy9|;wUPMdoWga)9UeN$tPWV=amk&n(F35o=
z1s%?B#<uw1(i(o}H+r>l$lhfLO+JN41sK0HD~dP|SW%-OqiCY)p?uGG17Ul&dtHKC
zb^A}f<sOh8Bjp-lM=5z~^0AZR2@Dr7xUlV0iJU-#gufA==YZQB4?Oyr=K!Q>*4o&1
zc69N<{PWYjGhJ>ZU~*RV*12@hAz0@AXe@rk5!Q6c-+22)#T*4KvbMv|opaV4LK)0r
zs54mwEN&}NrkHQ(iqOu=q6NO*HrUA-elhxEC`!Widt55iY)=CE@^F2q<8(da`xD9n
zCljVv?vy{%x8VBaWn)2YxPlB{MsWRVvTT=LMoD5GFRS0qzFFDa1<7-4M7QTp=bxz?
zK6UE0ZnF73dt@`SitqLvzdNBxRP-)WggjPcb#^t5IqKJA{*`Y(IM4FIwg%s!Iu2<c
z5>{f*emUseV2n;H7a@FB)*6X*R=<9Oj<|n=Ki@iZoq8x7gA6TJ`5@FzKm8Q|y4LtL
z;h&A|pvTck;ZgGag7L{$?U@$Tfyg`Fu8@<cDDlR}uJ?n@M>buYt^+YtP7ATdZ=au(
zQqX<h^Wq4ux^U)HxXCZ~(neyIzv69`KBvfoi|*!7>u--9*T}7tnYMDs$*z{t)nMNA
zGhP}K<{s0$q}3cW=cnXUfO8LIt^{avRQ}s4{?4WZZ;@E3j&ho5q@<BLA)6|<$f9Ey
zx>+j(@GrXEz=3vfk47Zj@3rfri+Z$kN{1k}7{ML?8r|B)R|c*vhuuLpT{Ibu*49rf
z1&fK(Mv{PFvmOIZ@<Q#EPXlT8nsfac#;P_=zm+De#%sS|v>tUW<#d0r2C@{Dj(BJw
zNm`gF8#6+9s#B^$eQ&SbsqRlF6-F}NHy+fdoIbzvxd%cjdG?q(brN`Gj;M8m>*3=E
zm9Kts1uNq;2UA!}CXOrW9`uXS1OqIvg+sG!Z|nn4uvqR1M>-OfsS@+fCYkl|0d>w-
zlOoAy_Y0^Unli4}h%ZS?7P2TLkf#!s{idWmRqhTyZoT+8dhtx{o~&r8tz2xm+e2(Q
ze!2N3t|ygiYR7$@)$d$b>g3;7vEa9^ZP+n06)zr;%UiW+d+IWyZo`CKroKb10geQw
zFuu(PNOsb_oQdZli*)!I#vv#HEoD`Fk?2SCy}1<sa<n9-r}NB=1u<4oos?EH17hz>
zEaOT39Dj9r4rv`6$-M2@#;BM73>+{Lvvs+$)j-YQB;@sxwC(Yf3nX&r(HMnU;;a%M
zZ(#ME6+k6tw_;&~!q#|eQqeh){wOERx$q+&*VZNPlCgM$cF%99+v!V%XZVL_@(s0h
zLi)bq+VVk}xL@}LT3EeL^*dL23V&_AuX&>TyreovZ2mdq_iK-5jkV0zPV!**YBw40
zqDDr936IRR6)3ge!$C>=yyP4Ch`W;Fzq$1PeU(k|vc6sYL`&w-ec<5$qPornyDC;{
zRNWmOCXt;UJ6QVehX5fQ>dKvefd1QROoCjwDeAyx?36Yum?%;XccO#l&Aih^?1rn<
z+x+=Bxc(8(h!vSh^N0!CJ!>DPm3T34s2F(8{z<Hns|i=d*599Y-JHo@V<E_`OV$X=
z?m;6FsE(w-cd+(K>5oQ1DV+G1Ci-eWsTTW(p`%;#_ivMQ_s`MgClz^nq^#rWAIH4R
zTu8J*<>c{iWP`)x><P!`4xtB)=TiQljbD0u(aW#MK?Nd8S~WL8{l>KczQX6ew_1|j
z*<NoWFTDN+Y`b8gO#$C+?x*rZ@LZYSzR<HRwSsh3?$NWQI>Www8E+Y7LSM(*+HN;~
zo<?Jz)y6Z?ez5o~I|OKBVHaW1lDv4$KkE{w@zO_)Dh)H9)YTlyWIiA3ul5Kv8}>`=
zUl{`T3iXhy*#Ew8Xjf$qON4ba;01BcUZ^ZX)OoUb<ha6qF<>qJOFOScMYVCs3#fZK
z{L@`7+#A-4gu^p!%gR|FPqy3<F7$GR$D>DYJgt}I&A&w29$I*jvS!@AKIx~sUZ_wM
ziJg-9>aRoAfEE0qSD@5<^<9&@d_&#&aU8qPood*Iwr+=$V8HO!7%Qq%g6;0j^v}Fo
zB9aM?Sp35+*>AlO|B`qM6KA0}xNt30tBBpsl{az-3psd{xHXf+^RS?pp{U;3#W;&9
z&o_A$sgv9Lynnv_IfUbXAC;tO`}k0t@rvrT2YA~n!}a8LC&AwHO!mvJLq>Pg175te
z>5j!lRCZAZDYFJ#kCs|eNi#e!N+^?_mZy>V#)Xo0C||wlcuY*~nflLe*&Bk+po|TP
zO4>4V<6B7h40|l--8chM6!F4INv<_UjB2cWyATus^%>H;j@0*Y=kru})2z4(+w5e$
zaqBLV=~@1GHrVsKKGj`{e*I2um!_k0{y_43MyFZL^CM$qhp*0WcPSe`v6p)NkwV7x
z+lp?ah3g@EK6jD2lK*&7p@8%qPLR5xoZjD6UV-i>r`)b`zRrLpiwoxYt();@v9du8
z7K%eF$Z0$`-`El@@IiL;{GV^l@i)uozi#9&Ja1U{RB)wBx|}xs<55F-C!f7=O1tXh
zA<g~+uD=aC8@H{^MhsXNP_E<=R{wGnG^?cD(?8AayDiy#AVdf?+Jog9eoEOzsc~@{
zOX5zh(1^pQ_5FwQ%Pnr2UL)C*&tM;8;T1+sa*wamlLrfnecXo&zj&tUl&<#?tn<F>
zIA5op^h%P^G2$zmZ2Af5v#MB*eIw@iL;T~DuS2$;u_3MW(+^dG=s946Q;^|$eB*i}
zz)nddhqK&PX@mGTpX0UKaXL<V9kokr93ghF+5f_-d?<lae7kTW8Wm&(G;9NRuZ9Q)
z=^JO&jw99VT}n5!q&HVCfa^tf%fv$PB;=BMXSob-+7eu;QjE^9X@P=5LmcZ6<zUAw
z<4IFnOhGxbf2%=olHSj^$WOIaQxf}H{2|KR@&kR->|q<d^)$U!jq<w6vCEgNM=wwi
z$=^+)DKD7O)5^P#?eIGgqq0umVtS1NoT}_Yaq264-DV{RI1xPro0}M;1_QjZ#{Dj3
z;ZPa<0!GFkBJic#5e8kaEgkuO>cHa-H4xsm0@PCe<!8+Yl+-(Z@1dPQirH<y@~SXB
z7v$sy4-lR^!N6_on&h}^Sg|mYIjF>R>W$cExQm|iI#nDnJJgl|EPc%neSxk!Nh!!0
zOBOJh&59UUwq{r*;#$f963a1ZW~CBL84V|ZUDqfe0Hh^sUL3*Dod$O>^qtocFSe7d
z_GRNQomPp%&TLR&xKc4O>sf*3LQO~UREC$8u$bm%^V*P6dU?HwP8$1H*W<YzEs7)<
zSxAnC>N&kdm98DJb{qBxEtw10ct3Y0v<j&$arP>XUD?RS)H#=h>G2RF(>2gt!iws0
zgK<%!SbkB!slMtk!q7-S_tj_tcXe_8VA&g;CS6ykiEFji=~g-SX{}I+Et9OWnX2h~
z5_K}+hIqdl21qPo<x|@N_g8O+;=?cT5yed>h4F9n*#X0J<+b^0c-t7q8t3RxMW&&K
zf8?7X15Rm|OI{f((Rp5a$W31BoJV5gpRT4X#7to2l@Iu-t^+$SNhWLkz!0V=UtG#M
z%waj-^I8kdYx+qi>vjEKW^01ONHhz!-ZNLdbJ>w$J8wO6TS;xlP*2{kN!u%M)>8pV
zdHvBQ(m)&`Z965GZLGWwdg#m*qN^o|kFAM+JK7=jfWgo&6WwbC4}ItHX)I*7gGQ#*
zpw>xncX<P^knr9=$Y<>*Zbf=L4X9Mnt<=i+RbO-Fxdgh}B@2KVHZ9({VAGog=*B_0
zoovEyELmmMMjJZJLAf)EKcUmcW#-F{(yp@0G6AZ{zYx`kS*u^wH%i}!fJUF}aAv7e
z6v5Pr2t```M9hU6^!=ywVmcFyzF7uSVE)dqP+{d7*kiiF7y7IVkz1y&-9jsBq66^~
zhQGnG6kp9zZ&`R3`y%zU<|S8HhCaR2Sw_TZUUvblKKl&-C2#M56$i1;7Y?$mu92vb
z7UR~WV&j#J7G$v$f?$M_>^)XGNHanA4)v=UIPSMA$_Gp-2s|s-G|-Efk6E#9vRsrC
z=iLC!%uRlJ3$f~&BC)9^Qbj>wrQJu638y<HhHW(qF#!Ir>^|%;+RD(u?Wy{%yBQco
zmJFfcV|J(YOuw=;bIHUh;e?Ojqlf5!)4aWhBNl60%R5ouWNrlbqBFT_j7~`v&_+(6
zL}tZRa`w5QzAu83z<xow=ax}zsrH`vz>Q3VrSPb*e8UPaa->O4z|ZYBR{s;^OS~a8
zSRHOVAj+yCnH!9<0Df7w(?=Y$A_-X6iz<Sm2EJICVn`1%WLhK&y-RCY#jVeX=W)#M
zzhRXJ?G-?~%pTcHjY=aEoA<3Z*{(Fl#Btp7DIB_VMSodK#$0SwQ$_OXCcYRg7F&F9
zTlGsqStBQYV)1To=5bsUfo&cwHs<Pi(~6U+!LwaR!9CBhq8kZ4Y=Lvossr=${fIK#
zp55L2SH086tTM~qW>rfMhcZR%HmGBk+sC$!dKDtyrF$>^6ofHa93~_k_bA<Peh?Iu
ze6C#owgx0^LU_&xgs!rizoz*mxxCO${C{46dk#;QjEhpCR*XACERyb56vv1$Cbrk-
zmDNP&zkVda66`oc5>z`5uHjI11bIG}{*WXM6BC*8E%!Q>M??&CsCOiQAcDEN*adAf
znV{2B(DiRwtr3OqKSrXE-dQ8}@^3`6iq=%d%nJ{1#V-MKeJ9A3av@V2|Bggx15a+)
zR44aFQE(vW@JL$t&Ieq)gePB_+WT7P9YW+m%Q+RY<&;yy2UM`7ZARf*R{Er5%))?n
zjoF~ewdxuR!+YK8hVjNS2wuSmy4C>qYB{i84LmgHzub(O?62V~pE+MJ?}9dMO*8O7
zhk)fx?9BGGq37$Se}mIxvLPbt#hEjAI@EO!hO@7b9A2f7=BE<8Ccd}@OxBlv9x#PH
zli4O)4OiMlnuZ;Ks4kpm^Q?7#(}X%X3OLg@srj3*>$xbgY-YDH=QjMr*o}^Q$}?(x
z_bqOMeKoWrI)(C4iEMS8Op9Vfi`M1uvddvd{4pLuRZkNF4i}_BIn_0Bs+)&>OuK|1
ze3L?5950l+K(<ZF2*tB`7!5q(xZW04B4~lk6qA}3V^3HmH;&9Kyioc_Ds=oDA++-r
ze66a%EE*8w+&M+Q-Vr0UbBUoW`7uTQ-MUH(s%d+@RCt=z+()O858G>1dzFa~?=Sbx
zVr09ZMCNmeq1PGRUTAAH>6+cV+0{hN*@VIE5n2e5#(`IM{mqwGri*?8so9Ms9jcjn
zE}Y@jFKL457LHeS6IbK1b1kZBsKWKth`Bv2CEaWnm5f*Cl(ieeo3MYs>lpd+;^O`D
zCBA?(TC`clzz%2ro9ltHP|gAU>d^R8a2`;Zexo_;^?2y9;oF~Q5vBbC5Iyrr=gzKt
zxKmNHORxRz4z?ZiY>`v}?5em6yz8{snoJ-T(Y54-U$i*yG=8pJc)=^AL`EC4T?zR8
z{J2t@AtdU~ENi5gv1nrvltmYvu*bA;5++%e4=iUO_ReVDwxJ5P+|jT&{mq3>HF<D0
zHw!;o4?Jb46^N&d@ca?}TpTay(XVIb+C^vFXz%LNdfo=e{w`2xh1~eJ4Vj<wY53H2
zM?7^E9j|cuZ@X2g(us4Cp?QxIaJupci+E+^HD#Zs;N_CrAnI6gkD9dRIdD)<<^j`r
z-iv6cxf#?>roK{!ur<p@4FuLpeM3w-9`P{#$ZZBwyAQ@6IP@<U6%YNOb(!Oa;Ar%T
zX)?Mf-572SlPzpylE-YK)o-txVnJ(F=El0fUH8Cpm(!mTxCZ(Ct5b{d<K542i}$Zg
zT(52e^GX^AfC`x(8jJnBisrfS8VLtX(j3qRoeX$tKHfNCT*a?+cQBw<;;lyKj~<23
zIU3FP-hXB15>u>m-_&79JkNA*-k4D;X7xL`jWTbQE(I50_7SCKAhDb7szxs@1i-a%
z3Hwfhn^8yJteeCS8SyX&Iff7PLY~jml3t{Fsyowyux=w;hgnyRV&|Xag{yvdf*FSe
zPo8B(fX^x~eh*-qyp9car)WqP#ACymTR@LM1*%E<M4^`R1H!KU#hc<4#jXaHi87oZ
zv~K0$F~TS+<ziT)b|NfMp^ILYzL|s^zIJi3aPc*OFp*VLd+#|dGqYDBuSrPMk^hs;
zg_l2qM8kSBRNO>CK#qC6bhzGtZf&iB*tWaP)#{^h#a#pAUyc-zEhpA;?rrT<XMN*^
z9BzlAsh!W__giMTAggZSS1Yxm__C3r=1oc)2N&(9Z128Xc_8LZQpPbZ#mkGt#PbE2
zBoZZ1P3xj?V;Ia*S!JFliolp7k8R5yIFvL}CDZr`UHGhu&t*FeKPT<@S!)wlbdyTV
zk1iy_&9!>Ef4+8cF|&rh<pt@L3U8#pb(>Xem6n?MF)+*`d4zQdG+8jI?v5g5{Z2VA
zJpVG<i+SMc)y$T4^tUWb3r9}`pFVaz+EQ7#X;HGQtN=8x(m|Q|K~o(jSSB3BL6QL(
z7>qGw*AcRr)tt-Xg5w5$>h6N+jRJ4FPO>_~s2f{6k5TT&D4geU&Me2&^hOfW6yrXj
zQC5dpurP>mkCnBqqOE5zxjacW=-T&3A?8=bBx-{|E-%h$y@1UxcA~yQNcGErGm>$<
z5bDxcxiFI9uJktjk*gq|@qKC*vd5Rc9Hr6Nf2eKNul!n**z9=2xKB&wb@hDdjzz4f
zq^uS>C|WvihhP@Y8ldM28xWM)+HxsEVsx_bio0($UFSa=i4A%27#~+GQEL@le%(%L
z*>$uT_{C=qQ@KdZwypGl5kDuvT=zLf`lFn)Rb@`LCM1LszF2P$FnaI5-BgY#cD<s>
zjf&y(IQD4(z2@Cs(C2fpbg5@lci|bnx#OHAA$V1h9di}tzIBST%il`c`ni29DloFr
zFP80pRNZ?0@#d1`tl}xQTMM0Mw3SKr{?$pHDa^l~sjI<dD3$Q~uUQcP-D$TjK?$%#
zMdN)RPwh+*P_!VzKS@*KG`Ee*P|qpIiNQ{$L4<WkiEHONt3yQq?||rTr%%J>SGLI*
zR=+{KVbwpYo^jMCqq@qXc1c`a2jw3VmnI~osgRIz7J(^hU?7!OvW$N1c~be2mE%}U
zHYwLtaJ3^0xAY(`%;WOgOKX#aj%s9_SNd@yBpbIj<R1mw<0{Db-$t<)110t+{g%YQ
zv>T0Usi30`_Lwb`h4bX>%{ZpE62Zs{AQK_|@|?x1u(m)ss{t!k`)YE4H|9NF?EGym
zz}K3J-hFS#8R}CmDwQD<*le4eaab}jz}^?eg59zfePW<gUA)(N%h0;wEG)I$%|WKt
zEss8Qt0>!7em|V12+4(S#(&=mxO{><NKJ%=V`H4z#|?{ZfOzo4ZH!K}^69*^4b6GU
z(H@G{VlQB`pO3C(n;A~euLJ4k=h;fEN<B~-5|z{#7(eLDIzCj_C`oa|7mZaLjDgUJ
z;JH6j)(hIrXXs)Vv<|KN3qp?`r_pyRgVnPyXoOLCLlV+g--TYS?$G8JPct64x^BJ{
zk_lS8@73TnxH?-kMl*SxaWmJg)cwmb7coxrtfpzGpYB&39XKFRHZZ<sC=<F^$Tn6L
zv?`wf!IDa-vZ;jXEU!ZBuX=9jdM;yCyuOr&sYsf8atJo=$w5}w*~Yq~n9Qit@IA$>
zHxd6@@KYvdgLcx(&Bx1GC*SA=4M(Nj%T&-!M&anbJVQ$3BPPf4#E5>&^@s+2F*Lu+
zKt5&5>5xkUdZyyzS1Law@ng*2P`>S*zyX2UI^5;_Ga1jD-qcYjkQ0pP1P^1Y-)ArD
zK{J^wd>&cf+Fr=)Ak<AS<HfUx>7Kio?e<+#K{O`w6|=*vpS{~iTR(L1nS=M<=1jGL
zjL#`2=5Y6T<7ZFS=<e0X0%~woMEQJ{BE^Yau|uBwUSyERQz9VcIr7ko@(<GgxKn36
z{J1ESP;wmI^5Ixlt1w9{@yNa80ahm;<LEJ>D}d!8@I^|{ai(OKZu)8}#ssLH#^4Mt
zzUhiC!#Kx79_3AF>iCX77cI#6(+%TM&3CVt^TiYRX4Cj)kvX$WCb=l{c(Ink9lH^S
ze{h3D9OGyrupBWUW<Qm{vE{k?7(Kbblb11ptds<fM~aTl=u8b_GQZ6fZ^MtLOePJ#
z{9FR2<Sq4~lNaA{;jOC}re5Ne<kwq>81U{}lPdG3r-eHTxYK1DIRBLIKMd(yYtKX;
z^K~)C$NiglAnRSpE>#VL026g(kj>xj@B-&gom-j3EM?9deU}&2Sr?@y8K?U(fu?*0
zW$p_g4<j+-)j$S@fBxG+AGP!iGC$n-wsohTgi7C1vp9=yzQmV<&^6A2rl&>1N$e){
zt)=EWr~oW#)`Xd*tZ(ur=(T~lPoBBYsy<{*Byf|wj7|8!S=A|VOfDH9=!Nn4MUuqJ
znOqvR4a_`}=mpGIlx&rWO>=Ji)d9im-+g4DU|noY*7`Aym^tO|5|!z_blXd&#(AM;
z0Tp9gs+9BSz}9tIBbrvO=M3_$Rf1m|+;OVG;5#k_bXnK)FuQ!_Vf*t2V<2=bL1r@X
z(oAO_3v9l-Vq(hly;G?+`*~C6`|nkCd6&6)z9q_ig!NqPNYQC*V){${{lU<!jnsU5
z)$fx?ZS^O{9j&&4*iG$5X9UbGf+@NxB~-9>R#-59@8PzTkVh^{e|9pReUU8{74b$Q
zim@MrKyD1o<t#MdUTjWX6-SEzi*3}N9ws}Ai5H(~R}R|eJ{qZf72@%9%C^n1(y6nn
zT~}03Un}x*cPh2*PQxz?cWvTxQFGnkoF`aesqtN$8O<LKK@u|2yRwoe%i?r}3qPBT
zI>Lx9cwFnv&nrIeYERS{n|WQT@yN^z>tXEC#hU?{n&}b~UB$2jn0~)vy{vMH!JMsU
zd1+X+9A@lpG!t=d3zF8|k)Cx248By4|3guEpq*>$&vLDUS-xAGcahYR2_=lrdIs+C
z$(Z6odz%)+?0%JPsgR4e5smc8A1b@4cB{qqQ0%&Yl3%2f_n8UHJn2y`o<+}CJury^
znO!(<a}d)3!-S@73&V0g!IpNq)e>eZCT!GpEKj?=eQQeW;mj?sgm)hHw+z3xZKaOa
z6)8Zrq8a<0B+BRit~9X$t=IZgo4<S?cL_9qA2=o~S^2iMaf;PBrk{M48&o*9<*w@k
zVS#{SS{s^7X6p@R>xUs4CHxht=gBm#!g#w?G3_VCjzGO6Jc9f<xQI9Zq%^?kPo5N<
zF@pqNFdQvzAD0%#{$@t;G+iZx2jC5m#xQj_zbEB|5lp#~DkSQ|cfPtx^A&3d&d^%)
z-7hoG6a+WAP)XEM|8(Af@Nq(QUBFCYGkV?dcv;2N`y5v2Ecu8U9XO_RL*`;Z8tb^g
zzcZapUeb2J9xUc)6^im~bf(=dH$UUU5HWsc)9{8jpkJ2kT<m6}04ju76e!2Wb3foH
zPWfoCdZb~Pcxgv;P?t%y^N<nCwprX-vuHdWB``C0wMEiv(V~=|4q<NP08rRtLXMti
z`p)t3i1Wf{6f2jcXA{xaY~kfLh22DC&B0Xo8g`vo#yTutGi^;SHM9!gcWl3#*;F<%
zLtsI}J+^@iWlo=_l{!fCl+J5MU~tB*4UA%?K?wa<O+cItji9G@Iag8@AkxrB*_WbP
zz5ljejNY%fC!aa9mg8aLSd^?(JQML|7+K0SPRYQRDXw+ee(D%-T29wONIfV8-X&9R
z&V8oK+65Ns;NJbr(|;q&*i2v*0{<eJkv!SU!KW!SHPE14+IguyK$S;VfJSjba1##D
z{L8w0W7d*+U4Iu^q~1V7o_8(rQW^xP^iGkG`I3-b;-YsHt&1r^IPl%TZo3g74~vBB
z!)IuwDy1Do(?vhQsJ;s<vw>6=l-}>OJ3srEVS|YU2q_WI_Fs=9j`!;<fYVMnh5-%3
z5(Vfhbf|AvINcfc>Jp6$FbslMtWcC@HNYYoG@i{?!sDok6;vmi1(`U(x|$!Yg570P
zL*h~JKQOzP8W(gvN#Giz+D0LsdlH{CS32YX-6&=WhuCZ|`wMz*CwD9=@B)RejDZY#
zEV6CyOWWT(x%}xLP^Tzkq(&bMavc(7y?`#92^_yQ5w(x`8k)P+PKJ_Wj~2C=rS*VG
zm_k!S`P0U$d-}<@292s;Tc5dWWQk!YtD1;}1_CpwImv9ZGh&;~Zm|!=KMQNf;w9iC
zc}EmUT*)tg2XB^+4Hs+IyY*UDqDDuwI@>cY!mY!a?!85^KEAg7vTiJ=v1~4EeL!Ev
z#|b_DI;n?RbWKR_VomEky*V(@tZzalc7ARN%&p3}s%NeS&CQeA(r3;CuM~rQ`tmvB
zQx6JP9pQ7%^{q+zd^QoSbXo#ouyHFPxst;zz5LXYNU!ZXW+2KEit3R95j!4>{QzVj
zPAPA|;QfSM!`LoxY8zbhF?4k1wM`AJf6FfvM?4MvGXrtSGEHjML!rw9bua^lRNDV8
zM*6G}@%ZGqB{km)oa%=tH@sZm137u!Mc|cA9pebPY-;I*f01g(z_hBJ#hMCmJnvV|
z<WXhJrXTX%h3?O*PTX{*JPjD~f#!Vg+Ogp@${?$wTkuS*%bo$e&@!0+dq-+{J9MsB
zozH{87h$$7cEy)<PKP-eV=}XMD`9f}tNLXFk`<;-rL`tFUq2gh-@hrT>F)`NB83e>
z8nm40GlZT0SlHA5A!x6WGJa3hli31-*FTO+LbIH@se8~BEYM<N^n8&?sR6sHREDIZ
zUsZBuEo`HqJWd{@Cy>WRa9~U9@hc8EDFJ(1dO@XMADM>yB?ZQ32fCix*@aReQ|nD}
zis6{}Q^T1^qi`AT9CzHQBVZ4JY`#>q5t1pM24Ytgi}VuJ^=CJd;6hc3!EoEt4&)cE
zs5*V!PkjZf2(OxylK7Ft0)Q&NixH>aB;bTfK|r*0#$4=aV0mdeFE;*6v*kku>by!~
z-mB?Mi#M4EHFry~%vi@gGn#32ij~-M$(7{UF>s-I&9VGWF%Ig*e*kCotc3sj1Su~C
zc3M2%d2>onaVcj-Ip!H0FY{5ey<=VLKz{x`pUbiA9f#ho&P&|a4>QJX?FDod=)#_#
zhnkZum_wJ@vooOh)|hBsVSN$(9}{SD=!+J|p-b3U0YPbOb=Ni~4ge;=JLH1_FKU7j
zl;l)h1W};!5mU=)TEot`+Odg|@0;>qN6-)dX&?^~ll60@pW`8C2kv{+r;GXBi_DUe
zh6@7Pj3u42Vk_K8za0Ux7$38*_1;^A`34ku_LK*>;D^Fx);vw<{RKF*vZRq3BjPtx
zi@i`0g5vDGXi1SoD$*Kfy=YO+Xd39MaQZ>~+{;Nz0}-fpOuP{O5#;eEtjySpe}Exv
z^OLF>tz!{AGKpV8qp!5yQP8ZonGI8L#$JLvU1A>}PL5wQSES%QQ!}FaUb<M*=*mUS
zr&;S*JV*yhs*R;XAg6l`sx2H||7jHd15_3cLE@8F(9q+V#O2bWEon%~mYP9P^MUHB
zoq%p&56sH<856ghz8>4z>syW0$|{%N)-3i2_vUM#7Mkat|NP;lPiYblW+}Ju-Q+yJ
zk`9{f>PF!VKf*L<8jQ26>eoKK`PiV5@IAD6D|C!<YbuV-rszfluzKS$SjdaYNG+Lv
zZ1;S4w@&*GlV~M1+&Q<uJtpe2p5$r|yL&flz+cv8m5wXS%~znemT+0IS$n?4veSBy
zJK?QejBg-~K3@8az<P;KqTWsu8!NB$q>A{+|L1aC=pvq8z&JPXs=H;oc^9*;lz(2B
z6k4>HgAx3o*i~q)*F45tbd{`h(uDQdQhtPWBdNiSe%6Qc$IS*LljN7hGVclTDdV;S
zu^kpKKQduZ*23(y0f;tz>3h6=_D59RVvd1833~8GR!&#%`rfWY&4UU#-h#dC1FYge
zQkrsjzJ@+*2T+{l6J4c`;t5=HP}G_)cfvx(LFL@Lzfz&BUD`2ocY|Dvn3JfL2T<|E
z`m!fLdDoJ%Y4cAM0yci(_wHZBO@8!on0mYQW)~B_;Ff8uHj+TkuRAV^u_?b+d*_wD
zascfPzl=Kmlilt^=$`wZx1+2Ne$0sp4G~!VdtNO}k86H&TOULoQ2jzs2V>h8#y0F@
z&I(N**gPA|Pgz0y>RH{-{MYGx+$LxW+`}656-oUiw);~tBb#-UzpRWqLlW-Zc-rE!
zyz{miLy1wkW>9Vp-3`1V8rOm|YQPIy<}G4$8{6))&I)=pK|gK?O(P$WCoveN8N<eS
z5dtQ{cA*C(zXrAV+@A=c1{GJEPtZ4Sh#0KA4(8S@n;*hilHj?(|7O;$`*O&I<@L<8
z+d_1`s1bY}?<eRR1In`0u!3@542jp4<m8k96hRYSDS@V?o1oY3Fc_o%MSYjK{FKH5
z;L1t<0tXg%^qexXt9SMnG^^LK6MQ=%5L<yAGj>pe=T4cg=-I>^njq%C<d^}q)|ChS
zD^dY|gnuelMgxAi7RgNzi0?8Ug+wubFt9U_bVs|j-&8Fx{5}wdkGy`3uW&HRVkV)5
zo)gTJItf@W-PKpaNArxw=Y?wm3>tQHx|m!N#Tx(Y<WV!8Y-%AZPYc2q=APnd?fPv`
zeTxl}cN*^9VSL;;fqyh9yvpH`{>DVYy4ij0;U<jJQQ(c*eq(5Vg5j#+?<JUlS&gc;
zXRt;hCwtku%*5i|*H{j_Q1q0-7<qEW9!5pUI6czWH{t?SJg^=kP^JR?Z7Ue)A_K5m
zoFGq@e3rb}RW1h0G&-oBTwRCQ?f<lj*_K>y(KWDuz)iYr7=#18cAmT7RU@>Fb_6|_
z$FKNxW(eXgG3nDP#R~yzBu24&$#v&hTpw4~q&y8Ecl~PF$JyT|r>3#F3sMdcg2*-k
zyddTITZvGBW<U&}gsW;D0rjjVvtu{#d{7&K+apeqgkpRfcR|n28)F&Rc6rM@B@bc&
ztT3R|>qmrbLo>kN>|cG+O<T>r?iaA&e>i09v#DD9d}8*s6mfFG*JpXeR7jXATI|cV
zLCEa#B1LqTYArH1U~AZ1gz(YAZlZ(mTgb5D-FmuTv|!^uxnsj2nbS|To3*;lA3Z_0
zlzqUYtyEGZeVK%WWWV;vl8<eUl~}tIAMM2mQ_Nw_1$bSk<=sWog*<@+TRQ}w&^y%1
zELUnWosP4U@233l<I9>hd6ieD!%XN~UczOyFE!t5FTTtE3zuxA%J7#-TSKgh0GIcY
zo+@eLyl9v<6ly}Zi}~B=6(i}>V}#qv<A*9bl=dduYh~mI7m3hjy*43fZ;5M}xxo&H
zlj#ACOFS6GKUGv9TP(*HMd%<N*FSf=N)Uv*&lMM)?BFJ&x(X_tBgw>EHVn02+%Q{(
zNf{M8m;eunWkcb|ufvl{2xTWDS4$%vJ?jz9rvA<`kf@ANXWr$QtCbk|PNNNu$9U(%
zW}2vmY3qS*il67MYm5B;iL&6ewNp!$1ky=qkt$Xh@gp>`z^Jk-YRMA-WL*_pK{0iO
zBE^R08$8m`WWJcdQn!IKFQ<jSLndN~Jfi0!EY?0JygNT>N6wZ3`p9;8a;M(yHK~yt
zgVoOSfPS6=`{B8_V2`wfUmkB)#gyb(U-&WSWQVr>-HTxI*GP*Rp1uIJT=uW)l_WJh
zcwz!@c;-}vlssFHW_rM{{0aMbhI?Sx3}MP9vaO9z$U5(gvY%1apfVv4jVtgb)HAN1
zW7L#?fy78Ygh#fS{~DG}TH*h!gZo+8iC3FF8f?C7<E0qODFGc7f{SBu6|or}SfTAh
z^!2hB5WEVQ91Y0^$eeH3dF1Y^R1mG5<-$nqD;GsIJjQ==Ib4REDi#HsrOi#g+Aj_d
zqKlQ>M0fR-?+irf2X{RHH#{<@N6m2?B<2;*^zjPLUrVHW?3mw$nDQ+#z+aksJ&)**
z<4a2$5m2Ajleha-f~tB@T=!9c{b4OdG{(tp;<;WK(D|JP+;shv1Ihz;NHWIVdBjwN
zfv5UuGXr|W*{Xc^NE*veaQxekJM|B#qgM<yM*k~|s7v|=Ok1LYT)d9)V7kIw?ITis
z&Ij9q^m*1YDlFLf<kSV;epac8$QGJst0mepXh}-1dg@Fi^hIa*f$t&6j5B^m)il>1
zY8X%*llu7aIM=|+YkMdkThv-JeDHI7={MyhF8A`dvmQJNK4A)VT&#!eUS8vKTMw6q
zoS(z&%st~|elNc@lJupKQR?WZ1=N;i4u>Qn?Pjd40&B&=gd4+-k4j=9LYbK~(nS!r
zn(1qtCQE?jlesg2Gyjs6aRgrW5O$UV|LNxMIZvwGWOLtZFKI&|Eal?j%8&AIeGTt-
z7V`gVu|M30EJPP-<MFWyybF8{?Zu7hJzx$NoV-}Z8=KEpve<>WTFhqsV}=2w^S~Aa
zz4=B*z;SxEJ9{`sYDRocqo*IFykJ=txF1bhHmB6kq4}X_t2o(8GTG{Iz4=lawZ48z
zk|esCkjM{&GM}~ETy?3e3}t$rSEHlPcYZFhwtOwMV1xeLid&*;g9t=|9O4wZK%j4E
z*jUW4al!l;KANCc3cH#-ZZrRL?*u-!7$te8ANjqC*J+R_9DMKSMcXiv&{$bi@u(ja
zYyjPsN)%_BQXZw;?%za~Z?J3=d;DSd?vK9E@!-0)6B9)DVe%d@z-47?p=60Ltb5bf
z7>C+%ZYkfBk!Ca{H(a=~O;CM$NW`;6DfvBP(_nX1Hr9K;gsugnsz;=@mEND{w|dIK
zHd&a8DmJP4KC|`N_xMU_5Mp;|v^|7d|1!fnn(SHk{4O8GU_Ld*vPEC*sP=at#y$qJ
zc3#-m+C3h+j*?})wdm+>niN#aglfruv+3;Z@YQ5!Ekf@iwt0I%w;VL%vtB>TO7_n1
zI{!axcN2TG>EANvcZkg-4Xpk*EUVQj?^E=~lo#7gvTyRO<ny-YK}~cx>64C|XtjO_
z^qU5a@dYx)E@Lw}jDoD4-SV1p{aE-IN~UEVezU^&At-mFWjK}HxQ&#RJ`33%3%Eh{
zZ|}p(J?2nTM0i#0uu58+@nPr9fW073+}vjJg&pILs<Ve7eW{c0XmSc^MC<ERZ&YXE
zKer6FG&F@*-UtT<uKE@yvE&I@8ntkjv{Y#x*My8(bKWQWrr1gzEf>()?_y^?MC}Z^
zKjLfQJXh!1>nU3XkS|#U($11v?+!~=V)hxy?$ci*jcZivi2}?-eKJCwWlzlHM={|^
zorCC^B)!~WUw`k8I}@j8Zq!ZP7Y1~TcijkL4-KMZhMwQ}=UUoX^RtLfXr0Jbx8NLO
zZBlCJcf-PNSg(|fjLP7>f~;OEc(K2xhh)A@q8BnZ^>>+>$$0KXb}NE3KwhzjZgcAe
z3Aa79g*<K;`ex?pXLB|(E6Q(L@nJy7bbj7Af0|Fg?xsK4t;irsb$TBoHT|$`3Z(L>
zc0nU^f7MlOkt|bFRXf2m3V2%0Z)xT~)+Mn$DDwUCyh@md>?t!j{mb_&tAWK%Vv_VN
zUq)lhXZqWA=ZR*dettvv`hzR5h23Jmu}EnHDbXV3@VWY9ZBGg!qtb<n>@!I?Yvr(h
zHn5e~SQ(OhWjfN#3fX2_nEXlu*vP*Dbwl*?yxX3gN1+N<2YzkGC;hs6D$VEaweNV8
zMn=Yb<G(APOi_syuPA4gLfZ|m4IJ0`?(Fv*ArZS%I1B;}eR4ih(M~X`w%t4!5JH7e
zQiW%w<@Z;YLQIa@$b%|2R_>{ji&??FcFh@nmue-IXEDzIZR)=wlE~!v+Q8#llGigM
z-(5pHo}(-CD>Ru{FBW1Gr6^=pU&WI9e85S}*GAOPRiTwUh?H1q<=8vhF5f1x?%n@x
z%!100Ms^uR!)0X@H?l~`G+^MFU0|^&Xv%&z(E(c|TQXF&3omMW>$bMhuw6FcP>?l$
zX}SZXQ34|}ev5u1yU%=$>am@V(a}n~Zi;hhQ#%P$k;h=}EgJ(kD-<-U*I5@ZS-UeK
z@N`JxWAav%l|)5BfikJdZE>a_;Vp#3teo-!2KLawiA8O|8L7!jn06V7=npgq{I}H>
zmeY^n{zdgMn~aJq`a0<@M*Ay!Bn8IgOk-(INUVLwg=UB#$Hl+JoA_y6txcB0HBu`!
zO;omGg!_0O*CvVN=C*LyJtYn!)Wdz(QtV7yN}h<=*2VEzJl+-e2sj)P_ZU-T*d(!c
z$LD<9&@G>IJpE(hRkSnr_9$8(ylW-z);4$e{*Q#NAWg#dc*^V)5t5d|No7XrE*mb^
zQe{NrA)T9>Oe;Xw?<+uTtn`IC%4@Uzb4p7;JzU>@#CG+qq~##1MDuM!R?p5;GUf-@
z`7N1G$2g4wM~vgZ=7CrbgF<l0g%d}=w9DV3>goxN!<n9USFN#<4sFGmcc)B>bi8bW
zoo8VV4Wz^m{x3~nuZMH(JKG!^xP*`o*<+e?=U~@QIZ6(|8kOE{K`Fpx=khEm(UV55
zXG6tIWh|Cz!-gE=y0Qaan!rsEaEFS$|Fq{#Yc}HVe61`Tcp%_LlfpI2^ZOM+t}6}f
zh9=+Y*4oE10SHo$BMH)V`$%D|#{-0nO-y@<MxD^G<(3*Y0XPlCIN|Nb^OxpB832Q<
zv&jPY|I$e$U1%s=z}pFKsoq3K7Rm76^D<p1QeD)wQ!Wr}a?dkIS1HO|_Qk*<L{IE2
ze1MWa?FCE*vQ3$kV=T6{O7V;G)wY`ryFPIAa*Ro)m<e1bJCU!&*ZfRJ*vdA?%ty)P
zBiUmvZE0YdJ((=ECVmzB#|E#RChp?tVfbHr+^I6N64GW3M6AJ<8Dbo(i!oCdj<Qtx
zZtKIyrLG8%{VoO&sdc5qdb-;zmNC6?j=l#~&Gx69p6uye(vSUVNR6z4qG7JfN(R61
zTWbbp-@TZMW>FK%O05kD1g3}7$O87M@8N$83gef7!3_oOG&o_ESn{CTq!9aphiRs2
zvjvt^T;4K*@2i?E!IcRzRFHPy7}bs%DFfp&4rXUR7P8ipr<T_e6s4!Rwquq49FQz`
z?jph3l{&)VPe?QUr$U_M%iAkPm7!5W<}O}-LOKRx5CAWzj<#Mu*l4RoIovdg9wL9u
zvg*H4f|x3Kd*U%i=0@o_`CnR^SaU<4(l<QP3*d$OUC3D<vexTX;rB^Kte3nWuaS|F
zla?Vevg=B%<YfOhhLVxpe|(MVzmLi3$;jSWUgswx`!7I{k&y*Is35)n?+c`r=6`qT
pe}9et&4>R*jsFirWqj@x4R5bVi5w++At_vwy;sy!D3h}c`9C68$iDyp

literal 0
HcmV?d00001

diff --git a/advanced_source/ddp_pipeline_tutorial.py b/advanced_source/ddp_pipeline_tutorial.py
new file mode 100644
index 00000000000..9a9fd909436
--- /dev/null
+++ b/advanced_source/ddp_pipeline_tutorial.py
@@ -0,0 +1,464 @@
+"""
+Training Transformer models using Distributed Data Parallel and Pipeline Parallelism
+====================================================================================
+
+**Author**: `Pritam Damania <https://github.com/pritamdamania87>`_
+
+This tutorial demonstrates how to train a large Transformer model across
+multiple GPUs using `Distributed Data Parallel <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html>`__ and
+`Pipeline Parallelism <https://pytorch.org/docs/stable/pipeline.html>`__. This tutorial is an extension of the
+`Sequence-to-Sequence Modeling with nn.Transformer and TorchText <https://pytorch.org/tutorials/beginner/transformer_tutorial.html>`__ tutorial
+and scales up the same model to demonstrate how Distributed Data Parallel and
+Pipeline Parallelism can be used to train Transformer models.
+
+Prerequisites:
+
+    * `Pipeline Parallelism <https://pytorch.org/docs/stable/pipeline.html>`__
+    * `Sequence-to-Sequence Modeling with nn.Transformer and TorchText <https://pytorch.org/tutorials/beginner/transformer_tutorial.html>`__
+    * `Getting Started with Distributed Data Parallel <https://pytorch.org/tutorials/intermediate/ddp_tutorial.html>`__
+"""
+
+
+######################################################################
+# Define the model
+# ----------------
+#
+
+######################################################################
+# ``PositionalEncoding`` module injects some information about the
+# relative or absolute position of the tokens in the sequence. The
+# positional encodings have the same dimension as the embeddings so that
+# the two can be summed. Here, we use ``sine`` and ``cosine`` functions of
+# different frequencies.
+
+import sys
+import os
+import math
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+import tempfile
+from torch.nn import TransformerEncoder, TransformerEncoderLayer
+
+class PositionalEncoding(nn.Module):
+
+    def __init__(self, d_model, dropout=0.1, max_len=5000):
+        super(PositionalEncoding, self).__init__()
+        self.dropout = nn.Dropout(p=dropout)
+
+        pe = torch.zeros(max_len, d_model)
+        position = torch.arange(0, max_len, dtype=torch.float).unsqueeze(1)
+        div_term = torch.exp(torch.arange(0, d_model, 2).float() * (-math.log(10000.0) / d_model))
+        pe[:, 0::2] = torch.sin(position * div_term)
+        pe[:, 1::2] = torch.cos(position * div_term)
+        pe = pe.unsqueeze(0).transpose(0, 1)
+        self.register_buffer('pe', pe)
+
+    def forward(self, x):
+        x = x + self.pe[:x.size(0), :]
+        return self.dropout(x)
+
+
+######################################################################
+# In this tutorial, we will split a Transformer model across two GPUs and use
+# pipeline parallelism to train the model. In addition to this, we use
+# `Distributed Data Parallel <https://pytorch.org/docs/stable/generated/torch.nn.parallel.DistributedDataParallel.html>`__
+# to train two replicas of this pipeline. We have one process driving a pipe across
+# GPUs 0 and 1 and another process driving a pipe across GPUs 2 and 3. Both these
+# processes then use Distributed Data Parallel to train the two replicas. The
+# model is exactly the same model used in the `Sequence-to-Sequence Modeling with nn.Transformer and TorchText
+# <https://pytorch.org/tutorials/beginner/transformer_tutorial.html>`__ tutorial,
+# but is split into two stages. The largest number of parameters belong to the
+# `nn.TransformerEncoder <https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoder.html>`__ layer.
+# The `nn.TransformerEncoder <https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoder.html>`__
+# itself consists of ``nlayers`` of `nn.TransformerEncoderLayer <https://pytorch.org/docs/stable/generated/torch.nn.TransformerEncoderLayer.html>`__.
+# As a result, our focus is on ``nn.TransformerEncoder`` and we split the model
+# such that half of the ``nn.TransformerEncoderLayer`` are in ``TransformerModelStage1``
+# and the other half are in ``TransformerModelStage2``.
+
+if sys.platform == 'win32':
+    print('Windows platform is not supported for pipeline parallelism')
+    sys.exit(0)
+if torch.cuda.device_count() < 4:
+    print('Need at least four GPU devices for this tutorial')
+    sys.exit(0)
+
+class TransformerModelStage1(nn.Module):
+
+    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
+        super(TransformerModelStage1, self).__init__()
+        self.src_mask = None
+        self.pos_encoder = PositionalEncoding(ninp, dropout)
+        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
+        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
+        self.encoder = nn.Embedding(ntoken, ninp)
+        self.ninp = ninp
+
+        self.init_weights()
+
+    def _generate_square_subsequent_mask(self, sz):
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+
+    def init_weights(self):
+        initrange = 0.1
+        self.encoder.weight.data.uniform_(-initrange, initrange)
+
+    def forward(self, src):
+        if self.src_mask is None or self.src_mask.size(0) != src.size(0):
+            device = src.device
+            mask = self._generate_square_subsequent_mask(src.size(0)).to(device)
+            self.src_mask = mask
+
+        src = self.encoder(src) * math.sqrt(self.ninp)
+        src = self.pos_encoder(src)
+        output = self.transformer_encoder(src, self.src_mask)
+        return output
+
+class TransformerModelStage2(nn.Module):
+
+    def __init__(self, ntoken, ninp, nhead, nhid, nlayers, dropout=0.5):
+        super(TransformerModelStage2, self).__init__()
+        self.src_mask = None
+        encoder_layers = TransformerEncoderLayer(ninp, nhead, nhid, dropout)
+        self.transformer_encoder = TransformerEncoder(encoder_layers, nlayers)
+        self.decoder = nn.Linear(ninp, ntoken)
+
+        self.init_weights()
+
+    def _generate_square_subsequent_mask(self, sz):
+        mask = (torch.triu(torch.ones(sz, sz)) == 1).transpose(0, 1)
+        mask = mask.float().masked_fill(mask == 0, float('-inf')).masked_fill(mask == 1, float(0.0))
+        return mask
+
+    def init_weights(self):
+        initrange = 0.1
+        self.decoder.bias.data.zero_()
+        self.decoder.weight.data.uniform_(-initrange, initrange)
+
+    def forward(self, src):
+        if self.src_mask is None or self.src_mask.size(0) != src.size(0):
+            device = src.device
+            mask = self._generate_square_subsequent_mask(src.size(0)).to(device)
+            self.src_mask = mask
+
+        output = self.transformer_encoder(src, self.src_mask)
+        output = self.decoder(output)
+        return output
+
+######################################################################
+# Start multiple processes for training
+# -------------------------------------
+#
+
+
+######################################################################
+# We start two processes where each process drives its own pipeline across two
+# GPUs. ``run_worker`` is executed for each process.
+
+def run_worker(rank, world_size):
+
+
+######################################################################
+# Load and batch data
+# -------------------
+#
+
+
+######################################################################
+# The training process uses Wikitext-2 dataset from ``torchtext``. The
+# vocab object is built based on the train dataset and is used to numericalize
+# tokens into tensors. Starting from sequential data, the ``batchify()``
+# function arranges the dataset into columns, trimming off any tokens remaining
+# after the data has been divided into batches of size ``batch_size``.
+# For instance, with the alphabet as the sequence (total length of 26)
+# and a batch size of 4, we would divide the alphabet into 4 sequences of
+# length 6:
+#
+# .. math::
+#   \begin{bmatrix}
+#   \text{A} & \text{B} & \text{C} & \ldots & \text{X} & \text{Y} & \text{Z}
+#   \end{bmatrix}
+#   \Rightarrow
+#   \begin{bmatrix}
+#   \begin{bmatrix}\text{A} \\ \text{B} \\ \text{C} \\ \text{D} \\ \text{E} \\ \text{F}\end{bmatrix} &
+#   \begin{bmatrix}\text{G} \\ \text{H} \\ \text{I} \\ \text{J} \\ \text{K} \\ \text{L}\end{bmatrix} &
+#   \begin{bmatrix}\text{M} \\ \text{N} \\ \text{O} \\ \text{P} \\ \text{Q} \\ \text{R}\end{bmatrix} &
+#   \begin{bmatrix}\text{S} \\ \text{T} \\ \text{U} \\ \text{V} \\ \text{W} \\ \text{X}\end{bmatrix}
+#   \end{bmatrix}
+#
+# These columns are treated as independent by the model, which means that
+# the dependence of ``G`` and ``F`` can not be learned, but allows more
+# efficient batch processing.
+#
+
+# In 'run_worker'
+    def print_with_rank(msg):
+        print('[RANK {}]: {}'.format(rank, msg))
+
+    import io
+    from torchtext.utils import download_from_url, extract_archive
+    from torchtext.data.utils import get_tokenizer
+    from torchtext.vocab import build_vocab_from_iterator
+
+    url = 'https://s3.amazonaws.com/research.metamind.io/wikitext/wikitext-2-v1.zip'
+    test_filepath, valid_filepath, train_filepath = extract_archive(download_from_url(url, root=".data{}".format(rank)))
+    tokenizer = get_tokenizer('basic_english')
+    vocab = build_vocab_from_iterator(map(tokenizer,
+                                          iter(io.open(train_filepath,
+                                                       encoding="utf8"))))
+
+    def data_process(raw_text_iter):
+      data = [torch.tensor([vocab[token] for token in tokenizer(item)],
+                           dtype=torch.long) for item in raw_text_iter]
+      return torch.cat(tuple(filter(lambda t: t.numel() > 0, data)))
+
+    train_data = data_process(iter(io.open(train_filepath, encoding="utf8")))
+    val_data = data_process(iter(io.open(valid_filepath, encoding="utf8")))
+    test_data = data_process(iter(io.open(test_filepath, encoding="utf8")))
+    device = torch.device(2 * rank)
+
+    def batchify(data, bsz, rank, world_size, is_train=False):
+        # Divide the dataset into bsz parts.
+        nbatch = data.size(0) // bsz
+        # Trim off any extra elements that wouldn't cleanly fit (remainders).
+        data = data.narrow(0, 0, nbatch * bsz)
+        # Evenly divide the data across the bsz batches.
+        data = data.view(bsz, -1).t().contiguous()
+        # Divide the data across the ranks only for training data.
+        if is_train:
+            data_per_rank = data.size(0) // world_size
+            data = data[rank * data_per_rank : (rank + 1) * data_per_rank]
+        return data.to(device)
+
+    batch_size = 20
+    eval_batch_size = 10
+    train_data = batchify(train_data, batch_size, rank, world_size, True)
+    val_data = batchify(val_data, eval_batch_size, rank, world_size)
+    test_data = batchify(test_data, eval_batch_size, rank, world_size)
+
+
+######################################################################
+# Functions to generate input and target sequence
+# ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+#
+
+
+######################################################################
+# ``get_batch()`` function generates the input and target sequence for
+# the transformer model. It subdivides the source data into chunks of
+# length ``bptt``. For the language modeling task, the model needs the
+# following words as ``Target``. For example, with a ``bptt`` value of 2,
+# we’d get the following two Variables for ``i`` = 0:
+#
+# .. image:: ../_static/img/transformer_input_target.png
+#
+# It should be noted that the chunks are along dimension 0, consistent
+# with the ``S`` dimension in the Transformer model. The batch dimension
+# ``N`` is along dimension 1.
+#
+
+# In 'run_worker'
+    bptt = 35
+    def get_batch(source, i):
+        seq_len = min(bptt, len(source) - 1 - i)
+        data = source[i:i+seq_len]
+        target = source[i+1:i+1+seq_len].view(-1)
+        return data, target
+
+######################################################################
+# Model scale and Pipe initialization
+# -----------------------------------
+#
+
+
+######################################################################
+# To demonstrate training large Transformer models using pipeline parallelism,
+# we scale up the Transformer layers appropriately. We use an embedding
+# dimension of 4096, hidden size of 4096, 16 attention heads and 8 total
+# transformer layers (``nn.TransformerEncoderLayer``). This creates a model with
+# **~1 billion** parameters.
+#
+# We need to initialize the `RPC Framework <https://pytorch.org/docs/stable/rpc.html>`__
+# since Pipe depends on the RPC framework via `RRef <https://pytorch.org/docs/stable/rpc.html#rref>`__
+# which allows for future expansion to cross host pipelining. We need to
+# initialize the RPC framework with only a single worker since we're using a
+# single process to drive multiple GPUs.
+#
+# The pipeline is then initialized with 8 transformer layers on one GPU and 8
+# transformer layers on the other GPU. One pipe is setup across GPUs 0 and 1 and
+# another across GPUs 2 and 3. Both pipes are then replicated using DistributedDataParallel.
+
+# In 'run_worker'
+    ntokens = len(vocab.stoi) # the size of vocabulary
+    emsize = 4096 # embedding dimension
+    nhid = 4096 # the dimension of the feedforward network model in nn.TransformerEncoder
+    nlayers = 8 # the number of nn.TransformerEncoderLayer in nn.TransformerEncoder
+    nhead = 16 # the number of heads in the multiheadattention models
+    dropout = 0.2 # the dropout value
+
+    from torch.distributed import rpc
+    tmpfile = tempfile.NamedTemporaryFile()
+    rpc.init_rpc(
+        name="worker",
+        rank=0,
+        world_size=1,
+        rpc_backend_options=rpc.TensorPipeRpcBackendOptions(
+            init_method="file://{}".format(tmpfile.name),
+        )
+    )
+
+    # Need to use 'checkpoint=never' since as of PyTorch 1.8, Pipe checkpointing
+    # doesn't work with DDP.
+    from torch.distributed.pipeline.sync import Pipe
+    model = Pipe(
+                torch.nn.Sequential(
+                    TransformerModelStage1(ntokens, emsize, nhead, nhid, int(nlayers/2), dropout).cuda(2 * rank),
+                    TransformerModelStage2(ntokens, emsize, nhead, nhid, int(nlayers/2), dropout).cuda(2 * rank + 1),
+                ),
+                chunks = 8,
+                checkpoint = "never"
+            )
+
+    # Initialize process group and wrap model in DDP.
+    from torch.nn.parallel import DistributedDataParallel
+    import torch.distributed as dist
+    os.environ['MASTER_ADDR'] = 'localhost'
+    os.environ['MASTER_PORT'] = '29500'
+    dist.init_process_group(
+                backend="nccl", rank=rank, world_size=world_size)
+    model = DistributedDataParallel(model)
+
+    def get_total_params(module: torch.nn.Module):
+        total_params = 0
+        for param in module.parameters():
+            total_params += param.numel()
+        return total_params
+
+    print_with_rank('Total parameters in model: {:,}'.format(get_total_params(model)))
+
+######################################################################
+# Run the model
+# -------------
+#
+
+
+######################################################################
+# `CrossEntropyLoss <https://pytorch.org/docs/master/nn.html?highlight=crossentropyloss#torch.nn.CrossEntropyLoss>`__
+# is applied to track the loss and
+# `SGD <https://pytorch.org/docs/master/optim.html?highlight=sgd#torch.optim.SGD>`__
+# implements stochastic gradient descent method as the optimizer. The initial
+# learning rate is set to 5.0. `StepLR <https://pytorch.org/docs/master/optim.html?highlight=steplr#torch.optim.lr_scheduler.StepLR>`__ is
+# applied to adjust the learn rate through epochs. During the
+# training, we use
+# `nn.utils.clip_grad_norm\_ <https://pytorch.org/docs/master/nn.html?highlight=nn%20utils%20clip_grad_norm#torch.nn.utils.clip_grad_norm_>`__
+# function to scale all the gradient together to prevent exploding.
+#
+
+# In 'run_worker'
+    criterion = nn.CrossEntropyLoss()
+    lr = 5.0 # learning rate
+    optimizer = torch.optim.SGD(model.parameters(), lr=lr)
+    scheduler = torch.optim.lr_scheduler.StepLR(optimizer, 1.0, gamma=0.95)
+
+    import time
+    def train():
+        model.train() # Turn on the train mode
+        total_loss = 0.
+        start_time = time.time()
+        ntokens = len(vocab.stoi)
+
+        # Train only for 50 batches to keep script execution time low.
+        nbatches = min(50 * bptt, train_data.size(0) - 1)
+
+        for batch, i in enumerate(range(0, nbatches, bptt)):
+            data, targets = get_batch(train_data, i)
+            optimizer.zero_grad()
+            # Since the Pipe is only within a single host and process the ``RRef``
+            # returned by forward method is local to this node and can simply
+            # retrieved via ``RRef.local_value()``.
+            output = model(data).local_value()
+            # Need to move targets to the device where the output of the
+            # pipeline resides.
+            loss = criterion(output.view(-1, ntokens), targets.cuda(2 * rank + 1))
+            loss.backward()
+            torch.nn.utils.clip_grad_norm_(model.parameters(), 0.5)
+            optimizer.step()
+
+            total_loss += loss.item()
+            log_interval = 10
+            if batch % log_interval == 0 and batch > 0:
+                cur_loss = total_loss / log_interval
+                elapsed = time.time() - start_time
+                print_with_rank('| epoch {:3d} | {:5d}/{:5d} batches | '
+                      'lr {:02.2f} | ms/batch {:5.2f} | '
+                      'loss {:5.2f} | ppl {:8.2f}'.format(
+                        epoch, batch, nbatches // bptt, scheduler.get_lr()[0],
+                        elapsed * 1000 / log_interval,
+                        cur_loss, math.exp(cur_loss)))
+                total_loss = 0
+                start_time = time.time()
+
+    def evaluate(eval_model, data_source):
+        eval_model.eval() # Turn on the evaluation mode
+        total_loss = 0.
+        ntokens = len(vocab.stoi)
+        # Evaluate only for 50 batches to keep script execution time low.
+        nbatches = min(50 * bptt, data_source.size(0) - 1)
+        with torch.no_grad():
+            for i in range(0, nbatches, bptt):
+                data, targets = get_batch(data_source, i)
+                output = eval_model(data).local_value()
+                output_flat = output.view(-1, ntokens)
+                # Need to move targets to the device where the output of the
+                # pipeline resides.
+                total_loss += len(data) * criterion(output_flat, targets.cuda(2 * rank + 1)).item()
+        return total_loss / (len(data_source) - 1)
+
+######################################################################
+# Loop over epochs. Save the model if the validation loss is the best
+# we've seen so far. Adjust the learning rate after each epoch.
+
+# In 'run_worker'
+    best_val_loss = float("inf")
+    epochs = 3 # The number of epochs
+    best_model = None
+
+    for epoch in range(1, epochs + 1):
+        epoch_start_time = time.time()
+        train()
+        val_loss = evaluate(model, val_data)
+        print_with_rank('-' * 89)
+        print_with_rank('| end of epoch {:3d} | time: {:5.2f}s | valid loss {:5.2f} | '
+              'valid ppl {:8.2f}'.format(epoch, (time.time() - epoch_start_time),
+                                         val_loss, math.exp(val_loss)))
+        print_with_rank('-' * 89)
+
+        if val_loss < best_val_loss:
+            best_val_loss = val_loss
+            best_model = model
+
+        scheduler.step()
+
+
+######################################################################
+# Evaluate the model with the test dataset
+# -------------------------------------
+#
+# Apply the best model to check the result with the test dataset.
+
+# In 'run_worker'
+    test_loss = evaluate(best_model, test_data)
+    print_with_rank('=' * 89)
+    print_with_rank('| End of training | test loss {:5.2f} | test ppl {:8.2f}'.format(
+        test_loss, math.exp(test_loss)))
+    print_with_rank('=' * 89)
+
+# Main execution
+import torch.multiprocessing as mp
+
+if __name__=="__main__":
+    world_size = 2
+    mp.spawn(run_worker, args=(world_size, ), nprocs=world_size, join=True)
+
diff --git a/index.rst b/index.rst
index dec8bc64a77..9dbad7ad71b 100644
--- a/index.rst
+++ b/index.rst
@@ -406,6 +406,13 @@ Welcome to PyTorch Tutorials
    :link: advanced/rpc_ddp_tutorial.html
    :tags: Parallel-and-Distributed-Training
 
+.. customcarditem::
+   :header: Training Transformer models using Distributed Data Parallel and Pipeline Parallelism
+   :card_description: Walk through a through a simple example of how to train a transformer model using Distributed Data Parallel and Pipeline Parallelism
+   :image: _static/img/thumbnails/cropped/Training-Transformer-Models-using-Distributed-Data-Parallel-and-Pipeline-Parallelism.png
+   :link: advanced/ddp_pipeline_tutorial.html
+   :tags: Parallel-and-Distributed-Training
+
 .. Mobile
 
 .. customcarditem::
@@ -613,6 +620,7 @@ Additional Resources
    intermediate/dist_pipeline_parallel_tutorial
    intermediate/rpc_async_execution
    advanced/rpc_ddp_tutorial
+   advanced/ddp_pipeline_tutorial
 
 .. toctree::
    :maxdepth: 2

From f931a062645b2d6440db6aafea6f81332c6d5164 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Thu, 4 Mar 2021 09:56:00 -0800
Subject: [PATCH 09/13] More updates to numeric_suite

---
 prototype_source/numeric_suite_tutorial.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py
index fee8308eb95..d900b8f3593 100644
--- a/prototype_source/numeric_suite_tutorial.py
+++ b/prototype_source/numeric_suite_tutorial.py
@@ -124,13 +124,13 @@ def compute_error(x, y):
 
 print("\nkeys of act_compare_dict entry for conv1's output:")
 print(act_compare_dict['conv1.stats'].keys())
-print(act_compare_dict['conv1.stats']['float'].shape)
-print(act_compare_dict['conv1.stats']['quantized'].shape)
+print(act_compare_dict['conv1.stats']['float'][0].shape)
+print(act_compare_dict['conv1.stats']['quantized'][0].shape)
 
 ##############################################################################
 # This dict can be used to compare and compute the quantization error of the activations of float and quantized models as following.
 for key in act_compare_dict:
-    print(key, compute_error(act_compare_dict[key]['float'], act_compare_dict[key]['quantized'].dequantize()))
+    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0].dequantize()))
 
 ##############################################################################
 # If we want to do the comparison for more than one input data, we can do the following.

From 4877298f5255902df2f4e57b62193ddfd578df00 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Thu, 4 Mar 2021 10:41:41 -0800
Subject: [PATCH 10/13] Even more updates

---
 prototype_source/numeric_suite_tutorial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py
index d900b8f3593..24a26a3dfa4 100644
--- a/prototype_source/numeric_suite_tutorial.py
+++ b/prototype_source/numeric_suite_tutorial.py
@@ -50,7 +50,7 @@
 float_model.fuse_model()
 float_model.qconfig = torch.quantization.default_qconfig
 img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]
-qmodel = quantize(float_model, default_eval_fn, (img_data,), inplace=False)
+qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)
 
 ##############################################################################
 # 1. Compare the weights of float and quantized models
@@ -206,7 +206,7 @@ def forward(self, x):
 float_model.fuse_model()
 float_model.qconfig = torch.quantization.default_qconfig
 img_data = [(torch.rand(2, 3, 10, 10, dtype=torch.float), torch.randint(0, 1, (2,), dtype=torch.long)) for _ in range(2)]
-qmodel = quantize(float_model, default_eval_fn, img_data, inplace=False)
+qmodel = quantize(float_model, default_eval_fn, [img_data], inplace=False)
 
 ##############################################################################
 # In the following example we call ``compare_model_stub()`` from PyTorch Numeric Suite to compare ``QuantizableBasicBlock`` module with its float point equivalent. This API returns a dict with key corresponding to module names and each entry being a dictionary with two keys 'float' and 'quantized', containing the output tensors of quantized and its matching float shadow module.

From 352c6a5e17fdbed7c79d48bc372579cf4bc7a1d4 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Thu, 4 Mar 2021 11:24:08 -0800
Subject: [PATCH 11/13] Update numeric_suite_tutorial.py

Hopefully that's the last one
---
 prototype_source/numeric_suite_tutorial.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py
index 24a26a3dfa4..3575e44ce09 100644
--- a/prototype_source/numeric_suite_tutorial.py
+++ b/prototype_source/numeric_suite_tutorial.py
@@ -224,14 +224,14 @@ def forward(self, x):
 
 print("\nkeys of ob_dict entry for layer1.0's output:")
 print(ob_dict['layer1.0.stats'].keys())
-print(ob_dict['layer1.0.stats']['float'].shape)
-print(ob_dict['layer1.0.stats']['quantized'].shape)
+print(ob_dict['layer1.0.stats']['float'][0].shape)
+print(ob_dict['layer1.0.stats']['quantized'][0].shape)
 
 ##############################################################################
 # This dict can be then used to compare and compute the module level quantization error.
 
 for key in ob_dict:
-    print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized'].dequantize()))
+    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0].dequantize()))
 
 ##############################################################################
 # If we want to do the comparison for more than one input data, we can do the following.

From c609fd517e4156e8c4c093d6253522fec82e6909 Mon Sep 17 00:00:00 2001
From: Nikita Shulga <nshulga@fb.com>
Date: Thu, 4 Mar 2021 12:45:52 -0800
Subject: [PATCH 12/13] Update numeric_suite_tutorial.py

Last one
---
 prototype_source/numeric_suite_tutorial.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/prototype_source/numeric_suite_tutorial.py b/prototype_source/numeric_suite_tutorial.py
index 3575e44ce09..df386f4efd2 100644
--- a/prototype_source/numeric_suite_tutorial.py
+++ b/prototype_source/numeric_suite_tutorial.py
@@ -370,7 +370,7 @@ def init_hidden(self, bsz):
 
 
 for key in act_compare_dict:
-    print(key, compute_error(act_compare_dict[key]['float'][0], act_compare_dict[key]['quantized'][0]))
+    print(key, compute_error(act_compare_dict[key]['float'][0][0], act_compare_dict[key]['quantized'][0][0]))
 
 ##############################################################################
 #
@@ -405,7 +405,7 @@ def init_hidden(self, bsz):
 # This dict can be then used to compare and compute the module level quantization error.
 
 for key in ob_dict:
-    print(key, compute_error(ob_dict[key]['float'], ob_dict[key]['quantized']))
+    print(key, compute_error(ob_dict[key]['float'][0], ob_dict[key]['quantized'][0]))
 
 ##############################################################################
 # SQNR of 40 dB is high and this is a situation where we have very good numerical alignment between the floating point and quantized model.

From aca434dd1f7f2ea1badb64cab77cf3650643e598 Mon Sep 17 00:00:00 2001
From: Brian Johnson <brianjo@fb.com>
Date: Thu, 4 Mar 2021 15:50:16 -0500
Subject: [PATCH 13/13] Update build.sh

---
 .jenkins/build.sh | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/.jenkins/build.sh b/.jenkins/build.sh
index b1220e89b90..90499be51f6 100755
--- a/.jenkins/build.sh
+++ b/.jenkins/build.sh
@@ -25,8 +25,8 @@ pip install -r $DIR/../requirements.txt
 #Install PyTorch Nightly for test. 
 # Nightly - pip install --pre torch torchvision torchaudio -f https://download.pytorch.org/whl/nightly/cu102/torch_nightly.html
 # RC Link
-pip uninstall -y torch torchvision torchaudio torchtext
-pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
+# pip uninstall -y torch torchvision torchaudio torchtext
+# pip install -f https://download.pytorch.org/whl/test/cu102/torch_test.html torch torchvision torchaudio torchtext
 
 # For Tensorboard. Until 1.14 moves to the release channel.
 pip install tb-nightly