Skip to content
Permalink

Comparing changes

Choose two branches to see what’s changed or to start a new pull request. If you need to, you can also or learn more about diff comparisons.

Open a pull request

Create a new pull request by comparing changes across two branches. If you need to, you can also . Learn more about diff comparisons here.
base repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected base ref is valid, then try again.
Loading
base: 5c737382f3d1
Choose a base ref
...
head repository: NixOS/nixpkgs
Failed to load repositories. Confirm that selected head ref is valid, then try again.
Loading
compare: 8712a719c20e
Choose a head ref
  • 2 commits
  • 2 files changed
  • 1 contributor

Commits on Nov 10, 2020

  1. Copy the full SHA
    c67382b View commit details
  2. python3Packages.transformers: relax tokenizers version bound

    danieldk authored and Jonathan Ringer committed Nov 10, 2020
    Copy the full SHA
    8712a71 View commit details
Showing with 18 additions and 3 deletions.
  1. +13 −3 pkgs/development/python-modules/tokenizers/default.nix
  2. +5 −0 pkgs/development/python-modules/transformers/default.nix
16 changes: 13 additions & 3 deletions pkgs/development/python-modules/tokenizers/default.nix
Original file line number Diff line number Diff line change
@@ -32,6 +32,14 @@ let
url = "https://norvig.com/big.txt";
sha256 = "0yz80icdly7na03cfpl0nfk5h3j3cam55rj486n03wph81ynq1ps";
};
docPipelineTokenizer = fetchurl {
url = "https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-pipeline/tokenizer.json";
hash = "sha256-i533xC8J5CDMNxBjo+p6avIM8UOcui8RmGAmK0GmfBc=";
};
docQuicktourTokenizer = fetchurl {
url = "https://s3.amazonaws.com/models.huggingface.co/bert/anthony/doc-quicktour/tokenizer.json";
hash = "sha256-ipY9d5DR5nxoO6kj7rItueZ9AO5wq9+Nzr6GuEIfIBI=";
};
openaiVocab = fetchurl {
url = "https://s3.amazonaws.com/models.huggingface.co/bert/openai-gpt-vocab.json";
sha256 = "0y40gc9bixj5rxv674br1rxmxkd3ly29p80x1596h8yywwcrpx7x";
@@ -42,16 +50,16 @@ let
};
in rustPlatform.buildRustPackage rec {
pname = "tokenizers";
version = "0.9.2";
version = "0.9.4";

src = fetchFromGitHub {
owner = "huggingface";
repo = pname;
rev = "python-v${version}";
sha256 = "0rsm1g5zfq3ygdb3s8v9xqqpgfzvvkc4n5ik3ahy8sw7pyjljb4m";
hash = "sha256-JXoH9yfhMIFg5qDY5zrF6iWb7XKugjMfk1NxSizfaWg=";
};

cargoSha256 = "0yn699dq9hdjh7fyci99ni8mmd5qdhzrsi80grzgf5cch8g38rbi";
cargoSha256 = "sha256-u9qitrOxJSABs0VjwHUZgmw7VTQXNbp6l8fKKE/RQ7M=";

sourceRoot = "source/bindings/python";

@@ -82,6 +90,8 @@ in rustPlatform.buildRustPackage rec {
ln -s ${robertaMerges} roberta-base-merges.txt
ln -s ${albertVocab} albert-base-v1-tokenizer.json
ln -s ${bertVocab} bert-base-uncased-vocab.txt
ln -s ${docPipelineTokenizer} bert-wiki.json
ln -s ${docQuicktourTokenizer} tokenizer-wiki.json
ln -s ${norvigBig} big.txt
ln -s ${openaiVocab} openai-gpt-vocab.json
ln -s ${openaiMerges} openai-gpt-merges.txt )
5 changes: 5 additions & 0 deletions pkgs/development/python-modules/transformers/default.nix
Original file line number Diff line number Diff line change
@@ -45,6 +45,11 @@ buildPythonPackage rec {
timeout-decorator
];

postPatch = ''
substituteInPlace setup.py \
--replace "tokenizers == 0.9.2" "tokenizers"
'';

preCheck = ''
export HOME="$TMPDIR"