Skip to content

Commit

Permalink
switch to using hashes from Data.Hashable
Browse files Browse the repository at this point in the history
The only thing one might worry about is more probable hash
collisions (since hashable uses Int values for hashes, we only get 64
(or 32) bit hashes as opposed to the 128 bits we were getting with MD5.
I have included a comment giving some rough ballpark estimates for the
probability of collisions.  I think in practice it is not a concern,
except perhaps when building thousands of diagrams simultaneously on a
32-bit system.
  • Loading branch information
Brent Yorgey committed Nov 28, 2013
1 parent efe65a8 commit c93a65b
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 109 deletions.
5 changes: 2 additions & 3 deletions diagrams-builder.cabal
Expand Up @@ -56,10 +56,9 @@ library
transformers >= 0.3 && < 0.4,
split >= 0.2 && < 0.3,
haskell-src-exts >= 1.13.1 && < 1.15,
cryptohash >= 0.8 && < 0.12,
bytestring >= 0.9.2 && < 0.11,
cmdargs >= 0.6 && < 0.11,
lens >= 3.9 && < 3.11
lens >= 3.9 && < 3.11,
hashable >= 1.2 && < 1.3
hs-source-dirs: src
default-language: Haskell2010
other-extensions: StandaloneDeriving,
Expand Down
100 changes: 28 additions & 72 deletions src/Diagrams/Builder.hs
Expand Up @@ -23,13 +23,13 @@ module Diagrams.Builder
-- ** Options
BuildOpts(..), mkBuildOpts, backendOpts, snippets, pragmas, imports, decideRegen, diaExpr, postProcess

-- ** Regeneration decision functions
, alwaysRegenerate, hashedRegenerate

-- ** Building
, buildDiagram, BuildResult(..)
, ppInterpError

-- ** Regeneration decision functions
, alwaysRegenerate, hashedRegenerate

-- * Interpreting diagrams
-- $interp
, setDiagramImports
Expand All @@ -46,16 +46,12 @@ import Control.Lens ((^.))
import Control.Monad (guard, mplus, mzero)
import Control.Monad.Error (catchError)
import Control.Monad.Trans.Maybe (MaybeT, runMaybeT)
import Crypto.Hash (Digest, MD5,
digestToHexByteString,
hash)
import qualified Data.ByteString.Char8 as B
import Data.List (nub)
import Data.Hashable (Hashable (..))
import Data.List (foldl', nub)
import Data.List.Split (splitOn)
import Data.Maybe (catMaybes, fromMaybe)
import Data.Typeable (Typeable)
import System.Directory (doesFileExist,
getDirectoryContents,
getTemporaryDirectory,
removeFile)
import System.FilePath (takeBaseName, (<.>),
Expand Down Expand Up @@ -121,11 +117,11 @@ getHsenvArgv = do
-- source file, using some backend to produce a result. The
-- expression can be of type @Diagram b v@ or @IO (Diagram b v)@.
interpretDiagram
:: forall b v x.
:: forall b v.
( Typeable b, Typeable v
, InnerSpace v, OrderedField (Scalar v), Backend b v
)
=> BuildOpts b v x
=> BuildOpts b v
-> FilePath
-> IO (Either InterpreterError (Result b v))
interpretDiagram bopts m = do
Expand Down Expand Up @@ -158,15 +154,14 @@ ppInterpError (GhcException err) = "GhcException: " ++ err
------------------------------------------------------------

-- | Potential results of a dynamic diagram building operation.
data BuildResult b v x =
data BuildResult b v =
ParseErr String -- ^ Parsing of the code failed.
| InterpErr InterpreterError -- ^ Interpreting the code
-- failed. See 'ppInterpError'.
| Skipped x -- ^ This diagram did not need to be
-- regenerated.
| OK x (Result b v) -- ^ A successful build, yielding a
-- backend-specific result and
-- some extra information.
| Skipped Hash -- ^ This diagram did not need to be
-- regenerated; includes the hash.
| OK Hash (Result b v) -- ^ A successful build, yielding the
-- hash and a backend-specific result.

-- | Build a diagram by writing the given source code to a temporary
-- module and interpreting the given expression, which can be of
Expand All @@ -176,9 +171,9 @@ data BuildResult b v x =
buildDiagram
:: ( Typeable b, Typeable v
, InnerSpace v, OrderedField (Scalar v), Backend b v
, Show (Options b v)
, Hashable (Options b v)
)
=> BuildOpts b v x -> IO (BuildResult b v x)
=> BuildOpts b v -> IO (BuildResult b v)
buildDiagram bopts = do
let bopts' = bopts
& snippets %~ map unLit
Expand All @@ -187,11 +182,16 @@ buildDiagram bopts = do
case createModule Nothing bopts' of
Left err -> return (ParseErr err)
Right m@(Module _ _ _ _ _ srcImps _) -> do
liHashes <- getLocalImportHashes srcImps
regen <- (bopts ^. decideRegen) (prettyPrint m ++ (bopts ^. diaExpr) ++ show (bopts ^. backendOpts) ++ concat liHashes)
liHash <- hashLocalImports srcImps
let diaHash
= 0 `hashWithSalt` prettyPrint m
`hashWithSalt` (bopts ^. diaExpr)
`hashWithSalt` (bopts ^. backendOpts)
`hashWithSalt` liHash
regen <- (bopts ^. decideRegen) diaHash
case regen of
(info, Nothing) -> return $ Skipped info
(info, Just upd) -> do
Nothing -> return $ Skipped diaHash
Just upd -> do
tmpDir <- getTemporaryDirectory
(tmp, h) <- openTempFile tmpDir ("Diagram.hs")
let m' = replaceModuleName (takeBaseName tmp) m
Expand All @@ -200,17 +200,16 @@ buildDiagram bopts = do

compilation <- interpretDiagram (bopts' & backendOpts %~ upd) tmp
removeFile tmp
return $ either InterpErr (OK info) compilation
return $ either InterpErr (OK diaHash) compilation

-- | Take a list of imports, and return hashes of the contents of
-- | Take a list of imports, and return a hash of the contents of
-- those imports which are local. Note, this only finds imports
-- which exist relative to the current directory, which is not as
-- general as it probably should be --- we could be calling
-- 'buildDiagram' on source code which lives anywhere.
getLocalImportHashes :: [ImportDecl] -> IO [String]
getLocalImportHashes
= (fmap . map) hashStr
. fmap catMaybes
hashLocalImports :: [ImportDecl] -> IO Hash
hashLocalImports
= fmap (foldl' hashWithSalt 0 . catMaybes)
. mapM getLocalSource
. map (foldr1 (</>) . splitOn "." . getModuleName . importModule)

Expand All @@ -234,46 +233,3 @@ getLocal m = tryExt "hs" `mplus` tryExt "lhs"
tryExt ext = do
let f = m <.> ext
liftIO (doesFileExist f) >>= guard >> liftIO (readFile f)

-- | Convenience function suitable to be given as the final argument
-- to 'buildDiagram'. It implements the simple policy of always
-- rebuilding every diagram.
alwaysRegenerate :: String -> IO ((), Maybe (a -> a))
alwaysRegenerate _ = return ((), Just id)

-- | Convenience function suitable to be given as the final argument
-- to 'buildDiagram'. It works by hashing the given diagram source,
-- and looking in the specified directory for any file whose base
-- name is equal to the hash. If there is such a file, it specifies
-- that the diagram should not be rebuilt. Otherwise, it specifies
-- that the diagram should be rebuilt, and uses the provided
-- function to update the rendering options based on the generated
-- hash. (Most likely, one would want to set the requested output
-- file to the hash followed by some extension.) It also returns
-- the generated hash.
hashedRegenerate
:: (String -> a -> a)
-- ^ A function for computing an update to rendering options,
-- given a new base filename computed from a hash of the
-- diagram source.

-> FilePath
-- ^ The directory in which to look for generated files

-> String
-- ^ The \"source\" to hash. Note that this does not actually
-- have to be valid source code. A common trick is to
-- concatenate the actual source code with String representations
-- of any other information on which the diagram depends.

-> IO (String, Maybe (a -> a))

hashedRegenerate upd d src = do
let fileBase = hashStr src
files <- getDirectoryContents d
case any ((fileBase==) . takeBaseName) files of
True -> return (fileBase, Nothing)
False -> return (fileBase, Just (upd fileBase))

hashStr :: String -> String
hashStr = B.unpack . digestToHexByteString . (hash :: B.ByteString -> Digest MD5) . B.pack
2 changes: 1 addition & 1 deletion src/Diagrams/Builder/Modules.hs
Expand Up @@ -35,7 +35,7 @@ import Diagrams.Builder.Opts
-- Returns the updated module, or an error message if parsing
-- failed.
createModule :: Maybe String -- ^ Module name to use
-> BuildOpts b v x
-> BuildOpts b v
-> Either String Module
createModule nm opts = do
ms <- mapM doModuleParse (opts ^. snippets)
Expand Down
127 changes: 94 additions & 33 deletions src/Diagrams/Builder/Opts.hs
Expand Up @@ -17,16 +17,36 @@
--
-----------------------------------------------------------------------------
module Diagrams.Builder.Opts
(
BuildOpts(..), mkBuildOpts, backendOpts, snippets, pragmas, imports, decideRegen, diaExpr, postProcess
( -- * Options

Hash
, BuildOpts(..), mkBuildOpts, backendOpts, snippets, pragmas, imports, decideRegen, diaExpr, postProcess

-- * Rebuilding

, alwaysRegenerate, hashedRegenerate, hashToHexStr
)
where

import Control.Lens (Lens, Lens', generateSignatures, lensRules,
import Control.Lens (Lens', generateSignatures, lensRules,
makeLensesWith, (&), (.~))
import System.Directory (getDirectoryContents)
import System.FilePath (takeBaseName)
import Text.Printf

import Diagrams.Prelude (Diagram, Options)

-- | Synonym for more perspicuous types.
--
-- We use @Int@ values for hashes because that's what the @Hashable@
-- package uses. Assuming diagram hashes are uniformly distributed,
-- on a 64-bit system one needs to build on the order of billions of
-- diagrams before the probability of a hash collision exceeds 1/2,
-- and for anything up to tens of millions of diagrams the
-- probability of a collision is under 0.1%. On 32-bit systems
-- those become tens of thousands and thousands, respectively.
type Hash = Int

-- | Options to control the behavior of @buildDiagram@. Create one
-- with 'mkBuildOpts' followed by using the provided lenses to
-- override more fields; for example,
Expand All @@ -36,7 +56,7 @@ import Diagrams.Prelude (Diagram, Options)
-- & imports .~ [\"Foo.Bar\", \"Baz.Quux\"]
-- & diaExpr .~ \"square 6 # fc green\"
-- @
data BuildOpts b v x
data BuildOpts b v
= BuildOpts
{ backendToken :: b
-- ^ Backend token
Expand All @@ -46,11 +66,13 @@ data BuildOpts b v x
, _snippets :: [String]
, _pragmas :: [String]
, _imports :: [String]
, _decideRegen :: String -> IO (x, Maybe (Options b v -> Options b v))
, _decideRegen :: Hash -> IO (Maybe (Options b v -> Options b v))
, _diaExpr :: String
, _postProcess :: Diagram b v -> Diagram b v
}

makeLensesWith (lensRules & generateSignatures .~ False) ''BuildOpts

-- | Create a @BuildOpts@ record with default options:
--
-- * no snippets
Expand All @@ -64,63 +86,102 @@ data BuildOpts b v x
-- * the diagram expression @circle 1@
--
-- * no postprocessing
mkBuildOpts :: b -> v -> Options b v -> BuildOpts b v ()
mkBuildOpts :: b -> v -> Options b v -> BuildOpts b v
mkBuildOpts b v opts
= BuildOpts b v opts [] [] [] (const (return ((), Just id))) "circle 1" id

makeLensesWith (lensRules & generateSignatures .~ False) ''BuildOpts
= BuildOpts b v opts [] [] [] alwaysRegenerate "circle 1" id

-- | Backend-specific options to use.
backendOpts :: Lens' (BuildOpts b v x) (Options b v)
backendOpts :: Lens' (BuildOpts b v) (Options b v)

-- | Source code snippets. Each should be a syntactically valid
-- Haskell module. They will be combined intelligently, /i.e./
-- not just pasted together textually but combining pragmas,
-- imports, /etc./ separately.
snippets :: Lens' (BuildOpts b v x) [String]
snippets :: Lens' (BuildOpts b v) [String]

-- | Extra @LANGUAGE@ pragmas to use (@NoMonomorphismRestriction@
-- is automatically enabled.)
pragmas :: Lens' (BuildOpts b v x) [String]
pragmas :: Lens' (BuildOpts b v) [String]

-- | Additional module imports (note that "Diagrams.Prelude" is
-- automatically imported).
imports :: Lens' (BuildOpts b v x) [String]

-- | A function to decide whether a particular diagram needs to
-- be regenerated. It will be passed the final assembled
-- source for the diagram (but with the module name set to
-- @Main@ instead of something auto-generated, so that hashing
-- the source will produce consistent results across runs). It
-- can return some information (such as a hash of the source)
-- via the @x@ result, which will be passed through to the
-- result of 'buildDiagram'. More importantly, it decides
-- whether the diagram should be built: a result of 'Just'
-- means the diagram /should/ be built; 'Nothing' means it
-- should not. In the case that it should be built, it returns
-- a function for updating the rendering options. This could
-- be used, /e.g./, to request a filename based on a hash of
-- the source.
imports :: Lens' (BuildOpts b v) [String]

-- | A function to decide whether a particular diagram needs to be
-- regenerated. It will be passed a hash of the final assembled
-- source for the diagram (but with the module name set to @Main@
-- instead of something auto-generated, so that hashing the source
-- will produce consistent results across runs), plus any options,
-- local imports, and other things which could affect the result of
-- rendering. It can return some information (such as a hash of the
-- source) via the @x@ result, which will be passed through to the
-- result of 'buildDiagram'. More importantly, it decides whether
-- the diagram should be built: a result of 'Just' means the diagram
-- /should/ be built; 'Nothing' means it should not. In the case
-- that it should be built, it returns a function for updating the
-- rendering options. This could be used, /e.g./, to request a
-- filename based on a hash of the source.
--
-- Two standard decision functions are provided for
-- convenience: 'alwaysRegenerate' returns no extra information
-- and always decides to regenerate the diagram;
-- 'hashedRegenerate' creates a hash of the diagram source and
-- looks for a file with that name in a given directory.
decideRegen :: Lens (BuildOpts b v x) (BuildOpts b v x')
(String -> IO (x, Maybe (Options b v -> Options b v)))
(String -> IO (x', Maybe (Options b v -> Options b v)))
decideRegen :: Lens' (BuildOpts b v) (Hash -> IO (Maybe (Options b v -> Options b v)))

-- | The diagram expression to interpret. All the given import sand
-- snippets will be in scope, with the given LANGUAGE pragmas
-- enabled. The expression may have either of the types @Diagram b
-- v@ or @IO (Diagram b v)@.
diaExpr :: Lens' (BuildOpts b v x) String
diaExpr :: Lens' (BuildOpts b v) String

-- | A function to apply to the interpreted diagram prior to
-- rendering. For example, you might wish to apply @pad 1.1
-- . centerXY@. This is preferred over directly modifying the
-- string expression to be interpreted, since it gives better
-- typechecking, and works no matter whether the expression
-- represents a diagram or an IO action.
postProcess :: Lens' (BuildOpts b v x) (Diagram b v -> Diagram b v)
postProcess :: Lens' (BuildOpts b v) (Diagram b v -> Diagram b v)

-- | Convenience function suitable to be given as the final argument
-- to 'buildDiagram'. It implements the simple policy of always
-- rebuilding every diagram.
alwaysRegenerate :: Hash -> IO (Maybe (a -> a))
alwaysRegenerate _ = return (Just id)

-- | Convenience function suitable to be given as the final argument
-- to 'buildDiagram'. It works by converting the hash value to a
-- zero-padded hexadecimal string and looking in the specified
-- directory for any file whose base name is equal to the hash. If
-- there is such a file, it specifies that the diagram should not be
-- rebuilt. Otherwise, it specifies that the diagram should be
-- rebuilt, and uses the provided function to update the rendering
-- options based on the generated hash string. (Most likely, one
-- would want to set the requested output file to the hash followed
-- by some extension.)
hashedRegenerate
:: (String -> a -> a)
-- ^ A function for computing an update to rendering options,
-- given a new base filename computed from a hash of the
-- diagram source.

-> FilePath
-- ^ The directory in which to look for generated files

-> Hash
-- ^ The hash

-> IO (Maybe (a -> a))

hashedRegenerate upd d hash = do
let fileBase = hashToHexStr hash
files <- getDirectoryContents d
case any ((fileBase==) . takeBaseName) files of
True -> return Nothing
False -> return $ Just (upd fileBase)

hashToHexStr :: Hash -> String
hashToHexStr n = printf "%016x" n'
where
n' :: Integer
n' = fromIntegral n - fromIntegral (minBound :: Int)

0 comments on commit c93a65b

Please sign in to comment.