{-# LANGUAGE EmptyDataDecls , ForeignFunctionInterface , OverloadedStrings , UnicodeSyntax #-} {-# OPTIONS_HADDOCK prune #-} -- |An interface to manipulate documents of the HyperEstraier. module Text.HyperEstraier.Document ( -- * Types Document , DocumentID , ESTDOC -- private , wrapDoc -- private , withDocPtr -- private -- * Creating and parsing document , newDocument , parseDraft -- * Setting contents and attributes of document , addText , addHiddenText , setAttribute , setURI , setKeywords , setScore -- * Getting contents and attributes of document , getId , getAttrNames , getAttribute , getText , getURI , getKeywords , getScore -- * Dumping document , dumpDraft -- * Making snippet of document , makeSnippet ) where import Control.Monad import Control.Monad.Unicode import Data.Maybe import qualified Data.ByteString.Char8 as C8 import qualified Data.Text as T import Data.Text.Encoding import qualified Database.QDBM.Cabin.List as CL import qualified Database.QDBM.Cabin.Map as CM import Foreign.C.String import Foreign.C.Types import Foreign.ForeignPtr import Foreign.Ptr import Text.HyperEstraier.Utils import Network.URI import Prelude hiding (words) import Prelude.Unicode -- |'Document' is an opaque object representing a document of -- HyperEstraier. newtype Document = Document (ForeignPtr ESTDOC) data ESTDOC -- |'DocumentID' is just an alias to 'Prelude.Int'. It represents a -- document ID. type DocumentID = Int foreign import ccall unsafe "estraier.h est_doc_new" _new ∷ IO (Ptr ESTDOC) foreign import ccall unsafe "estraier.h est_doc_new_from_draft" _new_from_draft ∷ CString → IO (Ptr ESTDOC) foreign import ccall unsafe "estraier.h &est_doc_delete" _delete ∷ FunPtr (Ptr ESTDOC → IO ()) foreign import ccall unsafe "estraier.h est_doc_add_attr" _add_attr ∷ Ptr ESTDOC → CString → CString → IO () foreign import ccall unsafe "estraier.h est_doc_add_text" _add_text ∷ Ptr ESTDOC → CString → IO () foreign import ccall unsafe "estraier.h est_doc_add_hidden_text" _add_hidden_text ∷ Ptr ESTDOC → CString → IO () foreign import ccall unsafe "estraier.h est_doc_set_keywords" _set_keywords ∷ Ptr ESTDOC → Ptr CM.CBMAP → IO () foreign import ccall unsafe "estraier.h est_doc_set_score" _set_score ∷ Ptr ESTDOC → CInt → IO () foreign import ccall unsafe "estraier.h est_doc_id" _id ∷ Ptr ESTDOC → IO CInt foreign import ccall unsafe "estraier.h est_doc_attr_names" _attr_names ∷ Ptr ESTDOC → IO (Ptr CL.CBLIST) foreign import ccall unsafe "estraier.h est_doc_attr" _attr ∷ Ptr ESTDOC → CString → IO CString foreign import ccall unsafe "estraier.h est_doc_cat_texts" _cat_texts ∷ Ptr ESTDOC → IO CString foreign import ccall unsafe "estraier.h est_doc_keywords" _keywords ∷ Ptr ESTDOC → IO (Ptr CM.CBMAP) foreign import ccall unsafe "estraier.h est_doc_score" _score ∷ Ptr ESTDOC → IO CInt foreign import ccall unsafe "estraier.h est_doc_dump_draft" _dump_draft ∷ Ptr ESTDOC → IO CString foreign import ccall unsafe "estraier.h est_doc_make_snippet" _make_snippet ∷ Ptr ESTDOC → Ptr CL.CBLIST → CInt → CInt → CInt → IO CString wrapDoc ∷ Ptr ESTDOC → IO Document wrapDoc = fmap Document ∘ newForeignPtr _delete withDocPtr ∷ Document → (Ptr ESTDOC → IO a) → IO a withDocPtr (Document doc) = withForeignPtr doc -- |'newDocument' creates an empty document. newDocument ∷ IO Document newDocument = _new ≫= wrapDoc -- |'parseDraft' parses a document in the \"draft\" format. parseDraft ∷ T.Text → IO Document parseDraft draft = withUTF8CString draft $ \ draftPtr → _new_from_draft draftPtr ≫= wrapDoc -- |Set an attribute value of a document. setAttribute ∷ Document -- ^ The document. → T.Text -- ^ An attribute name. → Maybe T.Text -- ^ An attribute value. If this is -- 'Prelude.Nothing', the attribute will -- be deleted. → IO () setAttribute doc name value = withDocPtr doc $ \ docPtr → withUTF8CString name $ \ namePtr → withUTF8CString' value $ _add_attr docPtr namePtr -- |Add a block of text to a document. addText ∷ Document → T.Text → IO () addText doc text = withDocPtr doc $ \ docPtr → withUTF8CString text $ _add_text docPtr -- |Add a block of hidden text to a document. addHiddenText ∷ Document → T.Text → IO () addHiddenText doc text = withDocPtr doc $ \ docPtr → withUTF8CString text $ _add_hidden_text docPtr -- |Set an URI of a document. This is a special case of -- 'setAttribute'. setURI ∷ Document → Maybe URI → IO () setURI doc uri = setAttribute doc "@uri" (fmap uri2str uri) where uri2str = T.pack ∘ flip (uriToString id) "" -- |Set keywords of a document. setKeywords ∷ Document -- ^ The document. → [(T.Text, Integer)] -- ^ A list of @(keyword, score)@. → IO () setKeywords doc keywords = withDocPtr doc $ \ docPtr → withKeywordMapPtr $ _set_keywords docPtr where withKeywordMapPtr ∷ (Ptr CM.CBMAP → IO a) → IO a withKeywordMapPtr f = do m <- CM.fromList $ map encodeKeyword keywords CM.withMapPtr m f encodeKeyword (word, score) = (encodeUtf8 word, C8.pack $ show score) -- |Set an alternative score of a document. setScore ∷ Document → Maybe Int → IO () setScore doc = withDocPtr doc ∘ flip _set_score ∘ fromIntegral ∘ fromMaybe (-1) -- |Get the ID of document. getId ∷ Document → IO DocumentID getId = liftM fromIntegral ∘ flip withDocPtr _id -- |Get a list of all attribute names in a document. getAttrNames ∷ Document → IO [T.Text] getAttrNames doc = withDocPtr doc $ \ docPtr → _attr_names docPtr ≫= CL.wrapList ≫= CL.toList ≫= return ∘ map decodeUtf8 -- |Get an attribute value of a document. getAttribute ∷ Document → T.Text → IO (Maybe T.Text) getAttribute doc name = withDocPtr doc $ \ docPtr → withUTF8CString name $ \ namePtr → do valuePtr <- _attr docPtr namePtr if valuePtr == nullPtr then return Nothing else fmap Just (peekUTF8CString valuePtr) -- |Get the text in a document. getText ∷ Document → IO T.Text getText doc = withDocPtr doc $ \ docPtr → _cat_texts docPtr ≫= packMallocUTF8CString -- |Get the URI of a document. getURI ∷ Document → IO (Maybe URI) getURI doc = fmap (fmap parse) (getAttribute doc "@uri") where parse ∷ T.Text → URI parse = fromJust ∘ parseURI ∘ T.unpack -- |Get the keywords of a document. getKeywords ∷ Document → IO [(T.Text, Integer)] getKeywords doc = withDocPtr doc $ \ docPtr → _keywords docPtr ≫= CM.unsafePeekMap ≫= CM.toList ≫= return ∘ map decodeKeyword where decodeKeyword (word, score) = (decodeUtf8 word, read $ C8.unpack score) -- |Get an alternative score of a document. getScore ∷ Document → IO (Maybe Int) getScore doc = withDocPtr doc $ \ docPtr → _score docPtr ≫= \ n → case n of -1 → return Nothing _ → return $ Just $ fromIntegral n -- |Dump a document in the \"draft\" format. dumpDraft ∷ Document → IO T.Text dumpDraft doc = withDocPtr doc $ \ docPtr → _dump_draft docPtr ≫= packMallocUTF8CString -- |Make a snippet from a document. makeSnippet ∷ Document -- ^ The document. → [T.Text] -- ^ Words to be highlighted. → Int -- ^ Maximum width of the whole result. → Int -- ^ Width of the heading text to be shown. → Int -- ^ Width of the text surrounding each highlighted words. → IO [Either T.Text (T.Text, T.Text)] -- ^ A list of either -- @('Prelude.Left' -- non-highlighted text)@ -- or @('Prelude.Right' -- (highlighted word, its -- normalized form))@. makeSnippet doc words wwidth hwidth awidth = do wordsList <- CL.fromList $ map encodeUtf8 words withDocPtr doc $ \ docPtr → CL.withListPtr wordsList $ \ wordsPtr → _make_snippet docPtr wordsPtr (fromIntegral wwidth) (fromIntegral hwidth) (fromIntegral awidth) ≫= packMallocUTF8CString ≫= return ∘ parseSnippet where parseSnippet ∷ T.Text → [Either T.Text (T.Text, T.Text)] parseSnippet = map parseLine ∘ T.lines parseLine ∷ T.Text → Either T.Text (T.Text, T.Text) parseLine line = case T.break (== '\t') line of (x, y) | T.null y → Left x | otherwise → Right (x, T.tail y)