{
  "cells": [
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "view-in-github",
        "colab_type": "text"
      },
      "source": [
        "<a href=\"https://colab.research.google.com/github/CBravoR/AdvancedAnalyticsLabs/blob/pytorch/notebooks/python/Lab_Text_Analytics_Transformers.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "iDWxyBFpRaP-"
      },
      "source": [
        "# Transformers for Text Analytics\n",
        "\n",
        "In this lab, we will work on a Transformer model for text analytics. We will be using [Huggingface's Transformers library](https://huggingface.co/docs/transformers/index) which comes with many models that will be of use to use."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "_OliER25ECVG",
        "scrolled": true
      },
      "outputs": [],
      "source": [
        "# Install necessasary packages, if not done before\n",
        "!pip install transformers evaluate accelerate\n",
        "!pip install torchview\n",
        "!pip install livelossplot\n",
        "!pip install --upgrade pandas"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "qJm4jZARRavt",
        "scrolled": true
      },
      "outputs": [],
      "source": [
        "# Imports\n",
        "import numpy as np\n",
        "import os\n",
        "import pandas as pd\n",
        "\n",
        "# Scikit-learn\n",
        "from sklearn.model_selection import train_test_split\n",
        "from sklearn.utils import class_weight\n",
        "from sklearn.preprocessing import OneHotEncoder, StandardScaler\n",
        "from sklearn.metrics import roc_auc_score, confusion_matrix, roc_curve, auc\n",
        "\n",
        "# Plots\n",
        "import matplotlib.pyplot as plt\n",
        "import seaborn as sns\n",
        "from IPython.display import Image\n",
        "from torchview import draw_graph\n",
        "import graphviz\n",
        "from livelossplot import PlotLosses\n",
        "graphviz.set_jupyter_format('png')\n",
        "%matplotlib inline\n",
        "\n",
        "# Import Pytorch lybraries\n",
        "import torch\n",
        "import torch.nn as nn\n",
        "import torch.optim as optim\n",
        "from torch.autograd import Variable\n",
        "from torch.utils.data import TensorDataset, DataLoader, random_split\n",
        "from torch.optim.lr_scheduler import _LRScheduler\n",
        "\n",
        "# Huggingface\n",
        "from transformers import AutoTokenizer, DataCollatorWithPadding\n",
        "from transformers import AutoModelForSequenceClassification, TrainingArguments, Trainer\n",
        "from transformers import pipeline\n",
        "from datasets import load_dataset, Dataset, Value, ClassLabel, Features, load_from_disk\n",
        "import evaluate"
      ]
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "yv6ME605E7Dg"
      },
      "source": [
        "## Preprocessing the data\n",
        "\n",
        "For this example we will use data from investor calls and the company's rating the next quarter. Can we predict the rating of a company by checking their investor call a quarter ago?"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "collapsed": true,
        "id": "5MQLA_fBbRxh",
        "jupyter": {
          "outputs_hidden": true
        },
        "scrolled": true
      },
      "outputs": [],
      "source": [
        "# Get the data and unzip it.\n",
        "!gdown 'https://drive.google.com/uc?id=12O5Tebqw_ulwUY5IwkdRuo8LZhM90yB3'\n",
        "\n",
        "# Extract the files.\n",
        "!unzip InvestorCallData.zip"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "2GPXA6BcRl2c"
      },
      "outputs": [],
      "source": [
        "# Read data.\n",
        "investor_df = pd.read_csv('InvestorCallData.csv', index_col=0)\n",
        "investor_df"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "vc1AkZ5FGI4u"
      },
      "outputs": [],
      "source": [
        "# Clean the text.\n",
        "investor_df['text'] = investor_df['text'].apply(lambda x: \" \".join(x.split()))"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# Clean the ratings\n",
        "def clean_rating(rating):\n",
        "    if rating in np.arange(1,6):\n",
        "         return \"RatingAAA-A+\"\n",
        "    elif rating==6:\n",
        "         return \"RatingA\"\n",
        "    elif rating==7:\n",
        "         return \"RatingA-\"\n",
        "    elif rating==8:\n",
        "         return \"RatingBBB+\"\n",
        "    elif rating==9:\n",
        "         return \"RatingBBB\"\n",
        "    elif rating in np.arange(10,13):\n",
        "         return \"RatingBBBM-BB\"\n",
        "    elif rating in np.arange(13,16):\n",
        "         return \"RatingBBM-B\"\n",
        "    elif rating in np.arange(16,23):\n",
        "         return \"RatingBM-C\"\n",
        "    else:\n",
        "         return \"D\"\n",
        "\n",
        "# Apply\n",
        "investor_df['RATING'] = investor_df['RATING'].apply(clean_rating)\n",
        "\n",
        "# Result\n",
        "investor_df.head()"
      ],
      "metadata": {
        "id": "AkILu0kdeogI"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Save to CSV\n",
        "investor_df.to_csv('InvestorCallDataProcessed.csv', index=False)"
      ],
      "metadata": {
        "id": "xyi-3WA7HD-t"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "ebm4VgwCGI4u"
      },
      "source": [
        "The second step is to create a dataset in [Huggingface's format](https://huggingface.co/docs/datasets/create_dataset). We will also generate a train and test set to make a fair comparison between  our models. From here onwards we will use the [train_test_split](https://huggingface.co/docs/datasets/process#split) function from Huggingface. This works on any Huggingface dataset in general.\n"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "fiz3dF20GI4v"
      },
      "outputs": [],
      "source": [
        "# Create the dataset\n",
        "investor_data = Dataset.from_pandas(investor_df.loc[:,['text', 'RATING']])\n",
        "\n",
        "# Set the label variable\n",
        "investor_data = investor_data.class_encode_column(\"RATING\")\n",
        "investor_data.rename_column(\"RATING\", \"label\")\n",
        "\n",
        "# Drop the index variable\n",
        "investor_data = investor_data.remove_columns([\"__index_level_0__\"])\n",
        "\n",
        "# Train / test split\n",
        "investor_data = investor_data.train_test_split(0.33)\n",
        "investor_data"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "investor_data['train'].features"
      ],
      "metadata": {
        "id": "vnPQPH12bd_M"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "metadata": {
        "id": "A54aEFsfGI4v"
      },
      "source": [
        "Now we will tokenize the text, which means setting it up in a format that the model can understand. For this, we will use a [tokenizer](https://huggingface.co/docs/transformers/preprocessing), or a function that turns the text into the format that we need. The following code does that for [Distilbert](https://huggingface.co/distilbert/distilbert-base-uncased), a smaller and faster version of BERT that is trained to have similar performance."
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "S3CDwHxnR6ts"
      },
      "outputs": [],
      "source": [
        "# Tokenize the data.\n",
        "tokenizer = AutoTokenizer.from_pretrained(\"distilbert-base-uncased\")"
      ]
    },
    {
      "cell_type": "code",
      "execution_count": null,
      "metadata": {
        "id": "IGnr5k4ZGI4w"
      },
      "outputs": [],
      "source": [
        "# Function to truncate text. Our text is very long!\n",
        "def preprocess_function(examples):\n",
        "    return tokenizer(examples[\"text\"], truncation=True)"
      ]
    },
    {
      "cell_type": "code",
      "source": [
        "# 5 mins\n",
        "tokenized_investor_data = investor_data.map(preprocess_function, batched=True)"
      ],
      "metadata": {
        "id": "nVZNddBKLWfT"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Save the outcome to disk to not run this again.\n",
        "tokenized_investor_data.save_to_disk(\"TokenizedData\")"
      ],
      "metadata": {
        "id": "r5iXMw0QYJWV"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Zip to download\n",
        "# !zip -r TokenizedData.zip TokenizedData"
      ],
      "metadata": {
        "id": "xJzR1Uqbke5A"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Load tokenized data if needed\n",
        "# !gdown https://drive.google.com/uc?id=10JX6SLYAZi4Xu7UBbdDSJd7b5Xals_6y\n",
        "# !unzip TokenizedData.zip\n",
        "# tokenized_investor_data = load_from_disk('TokenizedData')"
      ],
      "metadata": {
        "id": "ALc7f7x8jZWv"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Finally, we'll create a data collator that will add padding to the data, if necessary, on-the-fly.\n"
      ],
      "metadata": {
        "id": "kiqfXjY0MzNm"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "data_collator = DataCollatorWithPadding(tokenizer=tokenizer)"
      ],
      "metadata": {
        "id": "gD-rh2YPM6ij"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Training a model\n",
        "\n",
        "Now we can start training the model. We will download the model from Huggingface and instiate it."
      ],
      "metadata": {
        "id": "fD79qUTvMC-r"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# How many classes there are.\n",
        "num_labels = len(investor_df[\"RATING\"].unique())\n",
        "print(f'There are {num_labels} classes in the dataset.')"
      ],
      "metadata": {
        "id": "sP6Qt4aUNjG0"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "model = AutoModelForSequenceClassification.from_pretrained(\n",
        "    \"distilbert-base-uncased\", num_labels=num_labels\n",
        "    )"
      ],
      "metadata": {
        "id": "3T7RndtmMtEE"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "You can ignore the warning. It is simply saying the classifier is not defined as we have not pretrained it yet. Let's do that now. The first step is to define the training parameters. In a real-life application, you would be doing this by performing a grid-search over a distributed setting, but here we will use some standard values."
      ],
      "metadata": {
        "id": "NtR0L_h1OD9z"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "accuracy = evaluate.load(\"accuracy\")\n",
        "\n",
        "def compute_metrics(eval_pred):\n",
        "    predictions, labels = eval_pred\n",
        "    predictions = np.argmax(predictions, axis=1)\n",
        "    return accuracy.compute(predictions=predictions, references=labels)"
      ],
      "metadata": {
        "id": "DVlogqLpPYp4"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "training_args = TrainingArguments(\n",
        "    # Where to store the model.\n",
        "    output_dir=\"ModelOutput\",\n",
        "    # Learning rate to use.\n",
        "    learning_rate=1e-6,\n",
        "    # Batch size to use per GPU in training.\n",
        "    per_device_train_batch_size=16,\n",
        "    # Batch size to use per GPU in evaluation\n",
        "    per_device_eval_batch_size=16,\n",
        "    # Epochs to train\n",
        "    num_train_epochs=2,\n",
        "    # If decaying or not the weights\n",
        "    weight_decay=5e-3,\n",
        "    # When to evaluate the model\n",
        "    evaluation_strategy=\"epoch\",\n",
        "    # When to save checkpoint\n",
        "    save_strategy=\"epoch\",\n",
        "    # Load best after training?\n",
        "    load_best_model_at_end=True,\n",
        "    # Save in Huggingface? (Account required)\n",
        "    push_to_hub=False,\n",
        "    # How often to log training\n",
        "    logging_steps=100,\n",
        ")"
      ],
      "metadata": {
        "id": "4ogee4A8Odw7"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Create trainer object.\n",
        "trainer = Trainer(\n",
        "    # What model to use.\n",
        "    model=model,\n",
        "    # Arguments to the model\n",
        "    args=training_args,\n",
        "    # Training data\n",
        "    train_dataset=tokenized_investor_data[\"train\"],\n",
        "    # Test dataset\n",
        "    eval_dataset=tokenized_investor_data[\"test\"],\n",
        "    # How to tokenize\n",
        "    tokenizer=tokenizer,\n",
        "    # How to pad sequences\n",
        "    data_collator=data_collator,\n",
        "    # Error function\n",
        "    compute_metrics=compute_metrics,\n",
        ")"
      ],
      "metadata": {
        "id": "a6deag6fPFpZ"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "trainer.train()"
      ],
      "metadata": {
        "id": "gzYpyPhzP23-"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "As we can see, the model has not trained much. This is obvious as we only gave it a few minutes to train. If left for a longer period it will start learning more.\n",
        "\n",
        "Assuming the model is ok, we now have a fine-tuned model that we can use for whatever we want. Let's save it so we can use it later."
      ],
      "metadata": {
        "id": "l2RLzXBCnaC1"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Save the model to a folder\n",
        "trainer.save_model('FTModel')"
      ],
      "metadata": {
        "id": "KqiQxQ77nciA"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Zip it\n",
        "!zip -r DistilBert.zip FTModel"
      ],
      "metadata": {
        "id": "RtNDHh2HohdN"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "## Inference\n",
        "\n",
        "Let's apply the model to a test set."
      ],
      "metadata": {
        "id": "nS9CMWz8m08H"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Download and unzip the model\n",
        "# !unzip DistilBert.zip\n",
        "# model = AutoModelForSeq2SeqLM.from_pretrained('FTmodel')"
      ],
      "metadata": {
        "id": "woJEt_ymop0C"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Huggingface is able to apply all inference steps in a unique function, as long as it is given a model. We can do that with the Pipeline operator, which you can read more about [here](https://huggingface.co/docs/transformers/pipeline_tutorial).\n",
        "\n",
        "We will do it by manually though, to see what is happening behind the scenes. Let's start by creating an example."
      ],
      "metadata": {
        "id": "e1CRU8dCpTF3"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "text = \"Eli Lilly and Co (NYSE:LLY) All right, thanks, Joe. 2023 was a year of advancement across our company. We grew our top-line. We progressed our pipeline. It advanced our external innovation agenda through partnerships and collaborations. We continue to invest in quality, the reliability and the resilience of our company's manufacturing infrastructure, and most importantly, delivered new life-saving and life-changing medicines to more patients.  In 2023, revenue grew 20% for the full year and 28% for the most recent quarter, as our newly launched portfolio continued to gain momentum. This past year, we announced positive Phase IIIs for Donanemab, Tirzepatide, Mirikizumab And Pertibrutinib. We also announced a positive Phase II result for Orforglipron as well as Retatrutide and moved these two important molecules into Phase III.  In terms of external innovation, in 2023, we continued to complement our pipeline through acquisitions and collaborations. These transactions included the acquisition of DICE Therapeutics, POINT Biopharma, Versanis Bio, Emergence Therapeutics, Mablink Biosciences, Immunotrac as well as Sigilon Therapeutics. We have several significant investments in manufacturing, including plans to expand capacity at the company's Research Triangle Park facility and the two manufacturing sites within the LEAP Innovation Park in Boone County, Indiana.  Most recently, we announced plans to construct a new high-tech manufacturing site in Germany. This facility will further expand the company's global injectable product and device manufacturing network, including for our diabetes and obesity portfolio. Most importantly, this past year, we brought innovative new medicines to patients.  In 2023, we received regulatory approvals for Zepbound, Jaypirca, Omvoh, in the U.S. -- in the EU rather, and an expanded label for Verzenio and two new indications for Jardiance. This progress will serve as a foundation to drive top-tier revenue growth and margin expansion over time.  As you can see on Slide 4, we continue to make progress against our strategic deliverables in Q4. Revenue grew 28% with our new products growing by over $2 billion. Since our last earnings call, we achieved several key pipeline milestones in addition to the Zepbound and Jaypirca CLL approvals, today, we announced top line results for the Tirzepatide Phase II SYNERGY-NASH trial as well as the Verzenio Phase III CYCLONE two trial. Dan will talk more about this update -- in his update.  In terms of business development, in Q4, we completed the acquisitions of Mablink Bioscience and POINT Biopharma, the latter of which expands our capacity and capability into Radioligand therapies. Lastly, we announced a 15% dividend increase for the sixth consecutive year and distributed over $1 billion in dividends in the fourth quarter. Thanks, Dave. Slide six summarizes financial performance in the fourth quarter of 2023, and I'll focus my comments on non-GAAP performance. We are pleased with the strong financial performance in the fourth quarter and for the full year. Our performance was highlighted by continued acceleration of revenue growth, driven by our new products and growth products.  Q4 revenue increased 28% compared to Q4 2022. Excluding divestiture, this represents a quarter-over-quarter acceleration revenue growth driven by Mounjaro, Verzenio, Jardiance and the recent launch of Zepbound. For the full year, revenue increased 20% driven by robust volume growth of 16%. Gross margin as a percent of revenue increased to 82.3%. Gross margin in the quarter benefited from higher realized prices, partially offset by higher manufacturing expenses.  Marketing, selling and administrative expenses increased 17%, primarily driven by higher expenses associated with launches of new products and additional indications as well as higher incentive compensation costs. R&D expenses increased 28%, primarily driven by higher development expenses for late-stage assets and additional investments in early-stage research as well as higher incentive compensation costs.  In Q4, we recognized acquired IPR&D charges of $623 million, which negatively impacted EPS by $0.62. In Q4 2022, acquired IPR&D charges totaled $240 million or $0.23 negative impact to EPS. Operating income increased 29% in Q4, driven by higher revenue from new launches, partially offset by operating expense growth.  Operating income as a percent of revenue was approximately 28% for the quarter and included a negative impact of approximately seven percentage points attributable to acquired IPR&D charges. Our Q4 effective tax rate was 13.1% compared to 7.3% in Q4 2022.  The higher effective tax rate for Q4 2023 was primarily driven by a lower net discrete tax benefit compared to the same period in 2022 and the new Puerto Rico tax regime. At the bottom line, we delivered earnings per share of $2.49 in Q4, a 19% increase compared to Q4 2022, inclusive of the negative impact of $0.62 from acquired IPR&D charges compared to $0.23 in Q4 2022.  On Slide 8, we quantify the effect of price, rate and volume on revenue growth. U.S. revenue increased 39% in Q4, driven by robust growth of Mounjaro, Verzenio and Zepbound. Net price in the U.S. increased 27% for the quarter, driven by Mounjaro access and savings cards dynamic as well as the onetime favorable change in estimates for rebates and discounts. Excluding Mounjaro, net price in the U.S. decreased by high single digits.  Europe continued its trend of strong growth in Q4. Excluding $65 million in revenue associated with milestones received for the EU approval and launch of Revenue was up 11% in constant currency, driven primarily by volume growth of Verzenio, Jardiance and Taltz. For Japan, we are pleased to see robust growth in Q4 as revenue increased 15% in constant currency, driven primarily by volume growth of Verzenio and Mounjaro.  Moving to China, Q4 revenue increased 7% in constant currency with volume growth of 10% partially offset by price declines. Volume growth in Q4 was primarily driven by Pivot. We are pleased to see China return to growth in 2023. Revenue in the rest of the world decreased 10% in constant currency. However, when you exclude the impact of the Q4 2022 sales of rights for Alimta Korea and Taiwan, sales grew 9% in constant currency, driven primarily by volume growth of Mounjaro and Verzenio.  Slide nine shows the contribution to worldwide volume growth by product category. As you can see, the new products and growth product categories combined contributed approximately 15 percentage points of volume growth for the quarter. Slide 10 provides additional perspective across our product categories.  First, I would like to highlight Verzenio, which saw worldwide sales growth of 42% in Q4 driven by robust demand growth and, to a lesser extent, higher realized prices. The continued positive momentum is driven by early breast cancer indication with steady performance in the metastatic indication. Jardiance continued its strong 2023 performance with worldwide revenue growth of 30% for the quarter. In the U.S., Jardiance revenue increased 29%, driven by increased demand.  In Q4, worldwide Trulicity revenue declined 14%. U.S. revenue decreased 18% driven by lower volume and lower realized prices. We experienced intermittent delays for filling orders of Trulicity. Starting in early December and going through January, all dose strengths of Trulicity were indicated as having limited availability on the FDA drug shortage site.  We expect to experience intermittent delays orders of certain doses in the coming months. In international markets, Trulicity volume continued to be affected by measures we have taken to minimize potential disruption to existing patients, including communications to health care professionals not to start new patients on Trulicity.  Moving to Slide 11, Mounjaro continued its robust growth as more Type 2 diabetes patients benefited from the medicine. Q4 revenue grew to over $2.2 billion globally, up from $1.4 billion in Q3 2023. In the U.S., Mounjaro revenue of $2.1 billion in Q4, up from $1.3 billion in Q3 2023, benefited from a onetime change in SMS for rebates and discounts. Adjusted for this onetime change, sequential net sales in the U.S. would have grown approximately 30% in Q4.  Since our last call, we further expanded patient access to Mounjaro. As of February 1, access for patients with Type 2 diabetes in the U.S. was 90% in aggregate across commercial and Part D, including 92% access for commercial patients. This expanded access puts Mounjaro near parity with established injectable Incretins and gives more patients the opportunity to start therapy on Mounjaro for Type 2 diabetes.  Since the $25 noncovered co-pay part program expired on June 30, we now consider all prescriptions paid. Compared to Q4 2022, the Mounjaro net price in Q4 2023 benefited from this change to the co-pay part program in the U.S. Recall that after a change to the noncovered co-pay program in late 2022, patients already started on the $25 co-pay card could remain in the program until June 30. Today, commercially insured patients without coverage utilize the current noncovered co-pay program and pay roughly half the list price for Mounjaro prescription.  Turning to Slide 12. In November, we received FDA approval for Zepbound for adults with obesity or those who are overweight and have weight-related comorbidities. We then announced on December five that Zepbound available at U.S. pharmacies, and we started building commercial formulary access before the end of the year. We are pleased with the early access of approximately one-third of commercial lives covered as of February 1.  Access in this market will be more gradual as individual employers need to opt in to coverage after the typical formulary contracting takes place. We are focused on building formulary access and employer opt-ins, but we expect that it will take some time before we reach broad open access in this market.  Meanwhile, the commercial savings for our program is available at U.S. pharmacies for those who do not yet have coverage. In Medicare Part D, weight loss drugs are still prohibited from reimbursement. In Q4, we recognized $176 million in sales for Zepbound with approximately 3/4 of that coming from initial channel stocking. The initial prescription trends we have seen are encouraging.  On Slide 13, we provide an update on capital allocation. Looking forward to 2024 and beyond, we have confidence in our existing commercial portfolio, bolstered by the recent launches of Mounjaro, Jaypirca, Omvoh, and Zepbound and the potential launches of the Donanemab and Lebrikizumab, all of which we expect to serve as drivers for contained growth through the balance of the decade.  On Slide 14, you'll see a summary of our outlook; outline our capital deployment decisions in relation to achievement of our strategic deliverables. We will invest in our current portfolio and in the future innovation through R&D, business development and a comprehensive manufacturing expansion agenda designed to drive revenue growth and speed life-changing medicines to patients. We will continue to return capital to our shareholders through dividend increases in line with earnings growth over time and share repurchases with excess capital.  Moving to Slide 15, we highlight some of the dynamics that may impact our 2024 financial results. We expect continued robust revenue growth with revenue from our core business which excludes revenue from divestiture growing nearly 30% at the midpoint of our guidance range, driven by positive momentum to simply launch products. In Incretins, anticipated growth will be led by Mounjaro and Zepbound.  In 2023, we made tremendous strides in expanding access from Mounjaro, and we entered 2024 with 90% of commercial and Part D lives covered. Zepbound coverage is off to a good start in its early December launch, and we expect both Tirzepatide to contribute substantially to Lilly's revenue growth in 2024.  While we expect Mounjaro and Zepbound to be drivers of revenue growth, this will be partially offset by an expected continuation of the softer Trulicity sales trends that we saw in the second half of 2023. Recent revenue declines for Trulicity in the U.S. has been driven by supply tightness. Volume has also been impacted by our actions outside the U.S.  As for supply outlook for Emberton, our manufacturer organization continues to execute well on the most ambitious expansion agenda in our company's long history. Given strong demand and time required to bring capacity fully online, we continue to expect demand to outpace supply in 2024. In late 2022, we showed our expectation that by year-end 2023, our capacity for Incretin auto-injector pens would double. This goal was achieved through significant efforts from our manufacturing colleagues and partners around the globe.  In 2024, our capacity execution efforts will continue with equal urgency and will be accomplished not just through increased auto-injector capacity but also through alternative presentation like our multi-use Quick Pen, which received regulatory approval in the U.K. in late January. We expect our manufacturing site in Concord, North Carolina, will initiate production as early as the end of 2024, with product available to ship in 2025. And we are pursuing a host of the projects, internal and external, large and small, to further expand capacity.  Now I'll provide a bit more context on the timing and pace of our Incretin supply plans in 2024. While we're continuing to expand supply every quarter, we expect the most significant production increases to come in the second half of the year. We expect our production of sellable doses in the second half of 2024 will be at least one and a half times the production in the second half of 2023.  Note that while last year, our commentary focused on capacity of auto-injectors devices compared to 2022, we're now referring sellable doses produced, which is more relevant to patients and investors. Beyond Incretin, we look forward to progressing our launch project for two other medicine approved and launched in 2023, Jaypirca and Omvoh.  Jaypirca was initially approved by the FDA in January 2023 for adult patients with relapsed or refractory lymphoma under the accelerated approval program, received FDA approval also on the Accelerated Approval Program in December 2023 for adult patients with CLL or SLL that have received at least two prior lines of therapy. We look forward to the ongoing opportunity to help patients with this medicine as our best Phase III program continues.  Omvoh was approved in October 2023 in the U.S. and earlier that year in Japan, Europe and other markets and represents a compelling new options for patients struggling with moderate to severe ulcerative colitis. And in 2024, we look forward to potential U.S. launches of two more medicines, Donanemab and Lebrikizumab.  We continue to expect FDA regulatory actions on Donanemab in Q4 2024 and remain confident in the substantial potential for Donanemab to benefit patients with Alzheimer's disease. With the current state of diagnostic and treatment readiness, initial uptake will be somewhat limited, and we expect Donanemab to contribute only modestly to growth in 2024 once approved.  Lebrikizumab, which last year was approved and launched in Europe under the brand name by our partner, received regulatory approval in Japan in January. As for the U.S., we look forward to the potential proof of Lebrikizumab by the end of the year. We believe the efficacy, safety and dosing of Lebrikizumab can make it a compelling option for patients and prescribers in a large and growing market for the treatment of moderate to severe atopic dermatitis. Given the expected timing of FDA regulatory action, we expect Lebrikizumab to contribute only modestly to revenue growth in 2024.\""
      ],
      "metadata": {
        "id": "C23hcFw4qc06"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "Now, let's tokenize it."
      ],
      "metadata": {
        "id": "lXBqs8g9sPUz"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "# Apply tokenizer and return pytorch tensors\n",
        "inputs = tokenizer(text, return_tensors=\"pt\", truncation=True)"
      ],
      "metadata": {
        "id": "R92u0QIXqeNj"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "markdown",
      "source": [
        "And now we apply the model."
      ],
      "metadata": {
        "id": "QxWcYq0usdC7"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "with torch.no_grad():\n",
        "    logits = model(**inputs.to(\"cuda\")).logits"
      ],
      "metadata": {
        "id": "yCxv6MlWqyQx"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "# Probabilities\n",
        "probs = nn.functional.softmax(logits, dim=1).cpu().numpy()\n",
        "print(probs)\n",
        "\n",
        "# Class\n",
        "print(f'The text is predicted to be of class {np.argmax(probs)}')"
      ],
      "metadata": {
        "id": "bja3L6gGspMA"
      },
      "execution_count": null,
      "outputs": []
    }
  ],
  "metadata": {
    "accelerator": "GPU",
    "colab": {
      "provenance": [],
      "include_colab_link": true
    },
    "kernelspec": {
      "display_name": "Python 3 (ipykernel)",
      "language": "python",
      "name": "python3"
    },
    "language_info": {
      "codemirror_mode": {
        "name": "ipython",
        "version": 3
      },
      "file_extension": ".py",
      "mimetype": "text/x-python",
      "name": "python",
      "nbconvert_exporter": "python",
      "pygments_lexer": "ipython3",
      "version": "3.11.5"
    }
  },
  "nbformat": 4,
  "nbformat_minor": 0
}