% Licensed under the Apache License, Version 2.0 (the "License"); you may not
% use this file except in compliance with the License.  You may obtain a copy of
% the License at
%
%   http://www.apache.org/licenses/LICENSE-2.0
%
% Unless required by applicable law or agreed to in writing, software
% distributed under the License is distributed on an "AS IS" BASIS, WITHOUT
% WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.  See the
% License for the specific language governing permissions and limitations under
% the License.

-module(couch_db).
-behaviour(gen_server).

-export([open/2,create/2,create/3,get_doc_info/2,start_compact/1]).
-export([save_docs/2, save_docs/3, get_db_info/1, update_doc/3, update_docs/2, update_docs/3]).
-export([delete_doc/3,open_doc/2,open_doc/3,enum_docs_since/4,enum_docs_since/5]).
-export([enum_docs/4,enum_docs/5, open_doc_revs/4, get_missing_revs/2]).
-export([enum_docs_since_reduce_to_count/1,enum_docs_reduce_to_count/1]).
-export([start_update_loop/2]).
-export([init/1,terminate/2,handle_call/3,handle_cast/2,code_change/3,handle_info/2]).
-export([start_copy_compact_int/2]).

-export([btree_by_id_split/1,
            btree_by_id_join/2,
            btree_by_id_reduce/2,
            btree_by_seq_split/1,
            btree_by_seq_join/2,
            btree_by_seq_reduce/2]).

-include("couch_db.hrl").

-record(db_header,
    {write_version = 0,
     update_seq = 0,
     summary_stream_state = nil,
     fulldocinfo_by_id_btree_state = nil,
     docinfo_by_seq_btree_state = nil,
     local_docs_btree_state = nil,
     doc_count=0,
     doc_del_count=0
    }).

-record(db,
    {main_pid=nil,
    update_pid=nil,
    compactor_pid=nil,
    fd,
    header = #db_header{},
    summary_stream,
    fulldocinfo_by_id_btree,
    docinfo_by_seq_btree,
    local_docs_btree,
    update_seq,
    doc_count,
    doc_del_count,
    name,
    filepath
    }).

% small value used in revision trees to indicate the revision isn't stored
-define(REV_MISSING, []).

start_link(DbName, Filepath, Options) ->
    catch start_link0(DbName, Filepath, Options).
        
start_link0(DbName, Filepath, Options) ->
     % first delete the old file previous compaction
    Fd = 
    case couch_file:open(Filepath, Options) of
    {ok, Fd0} ->
        Fd0;
    {error, enoent} ->
        % couldn't find file. is there a compact version? This can happen if
        % crashed during the file switch.
        case couch_file:open(Filepath ++ ".compact") of
        {ok, Fd0} ->
            ?LOG_INFO("Found ~s~s compaction file, using as primary storage.", [Filepath, ".compact"]),
            ok = file:rename(Filepath ++ ".compact", Filepath),
            Fd0;
        {error, enoent} ->
            throw({error, not_found})
        end;
    Else ->
        throw(Else)
    end,
    
    StartResult = gen_server:start_link(couch_db, {DbName, Filepath, Fd, Options}, []),
    unlink(Fd),
    case StartResult of
    {ok, _} ->
        % We successfully opened the db, delete old storage files if around
        case file:delete(Filepath ++ ".old") of
        ok ->
            ?LOG_INFO("Deleted old storage file ~s~s", [Filepath, ".old"]);
        {error, enoent} ->
            ok  % normal result
        end;
    _ ->
        ok
    end,
    StartResult.

%%% Interface functions %%%

create(Filepath, Options) ->
    create(Filepath, Filepath, Options).

create(DbName, Filepath, Options) when is_list(Options) ->
    start_link(DbName, Filepath, [create | Options]).

open(DbName, Filepath) ->
    start_link(DbName, Filepath, []).


% Compaction still needs work. Right now readers and writers can get an error 
% file compaction changeover. This doesn't need to be the case.
start_compact(MainPid) ->
    gen_server:cast(MainPid, start_compact).

delete_doc(MainPid, Id, Revisions) ->
    DeletedDocs = [#doc{id=Id, revs=[Rev], deleted=true} || Rev <- Revisions],
    {ok, [Result]} = update_docs(MainPid, DeletedDocs, []),
    {ok, Result}.

open_doc(MainPid, IdOrDocInfo) ->
    open_doc(MainPid, IdOrDocInfo, []).

open_doc(MainPid, Id, Options) ->
    case open_doc_int(get_db(MainPid), Id, Options) of
    {ok, #doc{deleted=true}=Doc} ->
        case lists:member(deleted, Options) of
        true ->
            {ok, Doc};
        false ->
            {not_found, deleted}
        end;
    Else ->
        Else
    end.

open_doc_revs(MainPid, Id, Revs, Options) ->
    [Result] = open_doc_revs_int(get_db(MainPid), [{Id, Revs}], Options),
    Result.

get_missing_revs(MainPid, IdRevsList) ->
    Ids = [Id1 || {Id1, _Revs} <- IdRevsList],
    FullDocInfoResults = get_full_doc_infos(MainPid, Ids),
    Results = lists:zipwith(
        fun({Id, Revs}, FullDocInfoResult) ->
            case FullDocInfoResult of
            {ok, #full_doc_info{rev_tree=RevisionTree}} ->
                {Id, couch_key_tree:find_missing(RevisionTree, Revs)};
            not_found ->
                {Id, Revs}
            end
        end,
        IdRevsList, FullDocInfoResults),
    % strip out the non-missing ids
    Missing = [{Id, Revs} || {Id, Revs} <- Results, Revs /= []],
    {ok, Missing}.

get_doc_info(Db, Id) ->
    case get_full_doc_info(Db, Id) of
    {ok, DocInfo} ->
        {ok, couch_doc:to_doc_info(DocInfo)};
    Else ->
        Else
    end.
    
%   returns {ok, DocInfo} or not_found
get_full_doc_info(Db, Id) ->
    [Result] = get_full_doc_infos(Db, [Id]),
    Result.


get_full_doc_infos(MainPid, Ids) when is_pid(MainPid) ->
    get_full_doc_infos(get_db(MainPid), Ids);
get_full_doc_infos(#db{}=Db, Ids) ->
    couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids).

get_db_info(MainPid) when is_pid(MainPid) ->
    get_db_info(get_db(MainPid));
get_db_info(Db) ->
    #db{fd=Fd,
        compactor_pid=Compactor,
        doc_count=Count,
        doc_del_count=DelCount,
        update_seq=SeqNum} = Db,
    {ok, Size} = couch_file:bytes(Fd),
    InfoList = [
        {doc_count, Count},
        {doc_del_count, DelCount},
        {update_seq, SeqNum},
        {compact_running, Compactor/=nil},
        {disk_size, Size}
        ],
    {ok, InfoList}.

update_doc(MainPid, Doc, Options) ->
    {ok, [NewRev]} = update_docs(MainPid, [Doc], Options),
    {ok, NewRev}.

update_docs(MainPid, Docs) ->
    update_docs(MainPid, Docs, []).
    
% group_alike_docs groups the sorted documents into sublist buckets, by id.
% ([DocA, DocA, DocB, DocC], []) -> [[DocA, DocA], [DocB], [DocC]]
group_alike_docs(Docs) ->
    Sorted = lists:sort(fun(#doc{id=A},#doc{id=B})-> A < B end, Docs),
    group_alike_docs(Sorted, []).

group_alike_docs([], Buckets) ->
    lists:reverse(Buckets);
group_alike_docs([Doc|Rest], []) ->
    group_alike_docs(Rest, [[Doc]]);
group_alike_docs([Doc|Rest], [Bucket|RestBuckets]) ->
    [#doc{id=BucketId}|_] = Bucket,
    case Doc#doc.id == BucketId of
    true ->
        % add to existing bucket
        group_alike_docs(Rest, [[Doc|Bucket]|RestBuckets]);
    false ->
        % add to new bucket
       group_alike_docs(Rest, [[Doc]|[Bucket|RestBuckets]])
    end.
    

prepare_doc_for_new_edit(Db, #doc{id=Id,revs=[NewRev|PrevRevs]}=Doc, OldFullDocInfo, LeafRevsDict) ->
    case PrevRevs of
    [PrevRev|_] ->
        case dict:find(PrevRev, LeafRevsDict) of
        {ok, {Deleted, Sp, DiskRevs}} ->
            case couch_doc:has_stubs(Doc) of
            true ->
                DiskDoc = make_doc(Db, Id, Deleted, Sp, DiskRevs),
                Doc2 = couch_doc:merge_stubs(Doc, DiskDoc),
                Doc2#doc{revs=[NewRev|DiskRevs]};
            false ->
                Doc#doc{revs=[NewRev|DiskRevs]}
            end;
        error ->
            throw(conflict)
        end;
    [] ->
        % new doc, and we have existing revs. 
        OldDocInfo = couch_doc:to_doc_info(OldFullDocInfo),
        if OldDocInfo#doc_info.deleted ->
            % existing doc is a deleton
            % allow this new doc to be a later revision.
            {_Deleted, _Sp, Revs} = dict:fetch(OldDocInfo#doc_info.rev, LeafRevsDict),
            Doc#doc{revs=[NewRev|Revs]};
        true ->
            throw(conflict)
        end
    end.

update_docs(MainPid, Docs, Options) ->
    % go ahead and generate the new revision ids for the documents.
    Docs2 = lists:map(
        fun(#doc{id=Id,revs=Revs}=Doc) ->
            case Id of
            ?LOCAL_DOC_PREFIX ++ _ ->
                Rev = case Revs of [] -> 0; [Rev0|_] -> list_to_integer(Rev0) end,
                Doc#doc{revs=[integer_to_list(Rev + 1)]};
            _ ->
                Doc#doc{revs=[integer_to_list(couch_util:rand32()) | Revs]}
            end
        end, Docs),
    NewRevs = [NewRev || #doc{revs=[NewRev|_]} <- Docs2],
    DocBuckets = group_alike_docs(Docs2),
    Ids = [Id || [#doc{id=Id}|_] <- DocBuckets],
    Db = get_db(MainPid),
    
    % lookup the doc by id and get the most recent
    
    ExistingDocs = get_full_doc_infos(Db, Ids),
    
    DocBuckets2 = lists:zipwith(
        fun(Bucket, not_found) ->
            % no existing revs, make sure no old revision is specified.
            [throw(conflict) || #doc{revs=[_NewRev, _OldRev | _]} <- Bucket],
            Bucket;
        (Bucket, {ok, #full_doc_info{rev_tree=OldRevTree}=OldFullDocInfo}) ->
            Leafs = couch_key_tree:get_all_leafs(OldRevTree),
            LeafRevsDict = dict:from_list([{Rev, {Deleted, Sp, Revs}} || {Rev, {Deleted, Sp}, Revs} <- Leafs]),
            [prepare_doc_for_new_edit(Db, Doc, OldFullDocInfo, LeafRevsDict) || Doc <- Bucket]
        end,
        DocBuckets, ExistingDocs),
    % flush unwritten binaries to disk.
    DocBuckets3 = [[doc_flush_binaries(Doc, Db#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets2],

    case gen_server:call(MainPid, {update_docs, DocBuckets3, [new_edits | Options]}) of
    ok -> {ok, NewRevs};
    retry ->
        Db2 = get_db(MainPid),
        DocBuckets4 = [[doc_flush_binaries(Doc, Db2#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets3],
        % We only retry once
        case gen_server:call(MainPid, {update_docs, DocBuckets4, [new_edits | Options]}) of
        ok -> {ok, NewRevs};
        Else -> throw(Else)
        end;
    Else->
        throw(Else)
    end.

save_docs(MainPid, Docs) ->
    save_docs(MainPid, Docs, []).

save_docs(MainPid, Docs, Options) ->
    % flush unwritten binaries to disk.
    Db = get_db(MainPid),
    DocBuckets = group_alike_docs(Docs),
    DocBuckets2 = [[doc_flush_binaries(Doc, Db#db.fd) || Doc <- Bucket] || Bucket <- DocBuckets],
    ok = gen_server:call(MainPid, {update_docs, DocBuckets2, Options}).


doc_flush_binaries(Doc, Fd) ->
    % calc size of binaries to write out
    Bins = Doc#doc.attachments,
    PreAllocSize = lists:foldl(
        fun(BinValue, SizeAcc) ->
            case BinValue of
            {_Key, {_Type, {Fd0, _StreamPointer, _Len}}} when Fd0 == Fd ->
                % already written to our file, nothing to write
                SizeAcc;
            {_Key, {_Type, {_OtherFd, _StreamPointer, Len}}} ->
                % written to a different file
                SizeAcc + Len;
            {_Key, {_Type, Bin}} when is_binary(Bin) ->
                SizeAcc + size(Bin)
            end
        end,
        0, Bins),

    {ok, OutputStream} = couch_stream:open(Fd),
    ok = couch_stream:ensure_buffer(OutputStream, PreAllocSize),

    NewBins = lists:map(
        fun({Key, {Type, BinValue}}) ->
            NewBinValue =
            case BinValue of
            {Fd0, StreamPointer, Len} when Fd0 == Fd ->
                % already written to our file, nothing to write
                {Fd, StreamPointer, Len};
            {OtherFd, StreamPointer, Len} ->
                % written to a different file (or a closed file
                % instance, which will cause an error)
                {ok, {NewStreamPointer, Len}, _EndSp} =
                couch_stream:foldl(OtherFd, StreamPointer, Len,
                    fun(Bin, {BeginPointer, SizeAcc}) ->
                        {ok, Pointer} = couch_stream:write(OutputStream, Bin),
                        case SizeAcc of
                        0 -> % this was the first write, record the pointer
                            {ok, {Pointer, size(Bin)}};
                        _ ->
                            {ok, {BeginPointer, SizeAcc  + size(Bin)}}
                        end
                    end,
                    {{0,0}, 0}),
                {Fd, NewStreamPointer, Len};
            Bin when is_binary(Bin), size(Bin) > 0 ->
                {ok, StreamPointer} = couch_stream:write(OutputStream, Bin),
                {Fd, StreamPointer, size(Bin)}
            end,
            {Key, {Type, NewBinValue}}
        end, Bins),

    {ok, _FinalPos} = couch_stream:close(OutputStream),

    Doc#doc{attachments = NewBins}.

enum_docs_since_reduce_to_count(Reds) ->
    couch_btree:final_reduce(fun btree_by_seq_reduce/2, Reds).

enum_docs_reduce_to_count(Reds) ->
    couch_btree:final_reduce(fun btree_by_id_reduce/2, Reds).

enum_docs_since(MainPid, SinceSeq, Direction, InFun, Ctx) ->
    Db = get_db(MainPid),
    couch_btree:fold(Db#db.docinfo_by_seq_btree, SinceSeq + 1, Direction, InFun, Ctx).

enum_docs_since(MainPid, SinceSeq, InFun, Acc) ->
    enum_docs_since(MainPid, SinceSeq, fwd, InFun, Acc).

enum_docs(MainPid, StartId, Direction, InFun, InAcc) ->
    Db = get_db(MainPid),
    couch_btree:fold(Db#db.fulldocinfo_by_id_btree, StartId, Direction, InFun, InAcc).

enum_docs(MainPid, StartId, InFun, Ctx) ->
    enum_docs(MainPid, StartId, fwd, InFun, Ctx).

% server functions

init(InitArgs) ->
    spawn_link(couch_db, start_update_loop, [self(), InitArgs]),
    receive
    {initialized, Db} ->
        {ok, Db}
    end.

btree_by_seq_split(DocInfo) ->
    #doc_info{
        id = Id,
        rev = Rev,
        update_seq = Seq,
        summary_pointer = Sp,
        conflict_revs = Conflicts,
        deleted_conflict_revs = DelConflicts,
        deleted = Deleted} = DocInfo,
    {Seq,{Id, Rev, Sp, Conflicts, DelConflicts, Deleted}}.
    
btree_by_seq_join(Seq,{Id, Rev, Sp, Conflicts, DelConflicts, Deleted}) ->
    #doc_info{
        id = Id,
        rev = Rev,
        update_seq = Seq,
        summary_pointer = Sp,
        conflict_revs = Conflicts,
        deleted_conflict_revs = DelConflicts,
        deleted = Deleted}.

btree_by_id_split(#full_doc_info{id=Id, update_seq=Seq,
        deleted=Deleted, rev_tree=Tree}) ->
    {Id, {Seq, case Deleted of true -> 1; false-> 0 end, Tree}}.

btree_by_id_join(Id, {Seq, Deleted, Tree}) ->
    #full_doc_info{id=Id, update_seq=Seq, deleted=Deleted==1, rev_tree=Tree}.
    

btree_by_id_reduce(reduce, FullDocInfos) ->
    % count the number of deleted documents
    length([1 || #full_doc_info{deleted=false} <- FullDocInfos]);
btree_by_id_reduce(combine, Reds) ->
    lists:sum(Reds).
            
btree_by_seq_reduce(reduce, DocInfos) ->
    % count the number of deleted documents
    length(DocInfos);
btree_by_seq_reduce(combine, Reds) ->
    lists:sum(Reds).

init_db(DbName, Filepath, Fd, Header) ->
    {ok, SummaryStream} = couch_stream:open(Header#db_header.summary_stream_state, Fd),
    ok = couch_stream:set_min_buffer(SummaryStream, 10000),
    {ok, IdBtree} = couch_btree:open(Header#db_header.fulldocinfo_by_id_btree_state, Fd,
        [{split, fun btree_by_id_split/1},
        {join, fun btree_by_id_join/2},
        {reduce, fun btree_by_id_reduce/2}]),
    {ok, SeqBtree} = couch_btree:open(Header#db_header.docinfo_by_seq_btree_state, Fd,
            [{split, fun btree_by_seq_split/1},
            {join, fun btree_by_seq_join/2},
            {reduce, fun btree_by_seq_reduce/2}]),
    {ok, LocalDocsBtree} = couch_btree:open(Header#db_header.local_docs_btree_state, Fd),

    #db{
        update_pid=self(),
        fd=Fd,
        header=Header,
        summary_stream = SummaryStream,
        fulldocinfo_by_id_btree = IdBtree,
        docinfo_by_seq_btree = SeqBtree,
        local_docs_btree = LocalDocsBtree,
        update_seq = Header#db_header.update_seq,
        doc_count = Header#db_header.doc_count,
        doc_del_count = Header#db_header.doc_del_count,
        name = DbName,
        filepath=Filepath }.

close_db(#db{fd=Fd,summary_stream=Ss}) ->
    couch_file:close(Fd),
    couch_stream:close(Ss).
    
terminate(_Reason, Db) ->
    exit(Db#db.update_pid, kill).
    
handle_call({update_docs, DocActions, Options}, From, #db{update_pid=Updater}=Db) ->
    Updater ! {From, update_docs, DocActions, Options},
    {noreply, Db};
handle_call(get_db, _From, Db) ->
    {reply, {ok, Db}, Db};
handle_call({db_updated, NewDb}, _From, _OldDb) ->
    {reply, ok, NewDb}.


handle_cast(start_compact, #db{update_pid=Updater}=Db) ->
    Updater ! compact,
    {noreply, Db}.

code_change(_OldVsn, State, _Extra) ->
    {ok, State}.

handle_info(Msg, Db) ->
    ?LOG_ERROR("Bad message received for db ~s: ~p", [Db#db.name, Msg]),
    exit({error, Msg}).


%%% Internal function %%%

start_update_loop(MainPid, {DbName, Filepath, Fd, Options}) ->
    link(Fd),
    
    case lists:member(create, Options) of
    true ->
        % create a new header and writes it to the file
        Header =  #db_header{},
        ok = couch_file:write_header(Fd, <<$g, $m, $k, 0>>, Header),
        % delete any old compaction files that might be hanging around
        file:delete(Filepath ++ ".compact"),
        file:delete(Filepath ++ ".old");
    false ->
        {ok, Header} = couch_file:read_header(Fd, <<$g, $m, $k, 0>>)
    end,
    
    Db = init_db(DbName, Filepath, Fd, Header),
    Db2 = Db#db{main_pid=MainPid},
    MainPid ! {initialized, Db2},
    update_loop(Db2).
    
update_loop(#db{fd=Fd,name=Name,filepath=Filepath, main_pid=MainPid}=Db) ->
    receive
    {OrigFrom, update_docs, DocActions, Options} ->
        case (catch update_docs_int(Db, DocActions, Options)) of
        {ok, Db2} ->
            ok = gen_server:call(MainPid, {db_updated, Db2}),
            gen_server:reply(OrigFrom, ok),
            couch_db_update_notifier:notify({updated, Name}),
            update_loop(Db2);
        retry ->
            gen_server:reply(OrigFrom, retry),
            update_loop(Db);
        conflict ->
            gen_server:reply(OrigFrom, conflict),
            update_loop(Db);
        Error ->
            exit(Error) % we crashed
        end;
    compact ->
        case Db#db.compactor_pid of
        nil ->
            ?LOG_INFO("Starting compaction for db \"~s\"", [Name]),
            Pid = spawn_link(couch_db, start_copy_compact_int, [Db, true]),
            Db2 = Db#db{compactor_pid=Pid},
            ok = gen_server:call(MainPid, {db_updated, Db2}),
            update_loop(Db2);
        _ ->
            update_loop(Db) % already started
        end;
    {compact_done, CompactFilepath} ->
        {ok, NewFd} = couch_file:open(CompactFilepath),
        {ok, NewHeader} = couch_file:read_header(NewFd, <<$g, $m, $k, 0>>),
        #db{update_seq=NewSeq}= NewDb =
                init_db(Name, CompactFilepath, NewFd, NewHeader),
        case Db#db.update_seq == NewSeq of
        true ->
            ?LOG_DEBUG("CouchDB swapping files ~s and ~s.", [Filepath, CompactFilepath]),
            ok = file:rename(Filepath, Filepath ++ ".old"),
            ok = file:rename(CompactFilepath, Filepath),
            
            NewDb2 = NewDb#db{
                main_pid = Db#db.main_pid,
                doc_count = Db#db.doc_count,
                doc_del_count = Db#db.doc_del_count,
                filepath = Filepath},
            
            couch_stream:close(Db#db.summary_stream),
            % close file handle async.
            % wait 5 secs before closing, allowing readers to finish
            unlink(Fd),
            spawn_link(fun() ->
                receive after 5000 -> ok end,
                couch_file:close(Fd),
                file:delete(Filepath ++ ".old")
                end),
                
            ok = gen_server:call(MainPid, {db_updated, NewDb2}),
            ?LOG_INFO("Compaction for db ~p completed.", [Name]),
            update_loop(NewDb2#db{compactor_pid=nil});
        false ->
            ?LOG_INFO("Compaction file still behind main file "
                "(update seq=~p. compact update seq=~p). Retrying.",
                [Db#db.update_seq, NewSeq]),
            Pid = spawn_link(couch_db, start_copy_compact_int, [Db, false]),
            Db2 = Db#db{compactor_pid=Pid},
            update_loop(Db2)
        end;
    Else ->
        ?LOG_ERROR("Unknown message received in db ~s:~p", [Db#db.name, Else]),
        exit({error, Else})
    end.

get_db(MainPid) ->
    {ok, Db} = gen_server:call(MainPid, get_db),
    Db.

open_doc_revs_int(Db, IdRevs, Options) ->
    Ids = [Id || {Id, _Revs} <- IdRevs],
    LookupResults = get_full_doc_infos(Db, Ids),
    lists:zipwith(
        fun({Id, Revs}, Lookup) ->
            case Lookup of
            {ok, #full_doc_info{rev_tree=RevTree}} ->
                {FoundRevs, MissingRevs} =
                case Revs of
                all ->
                    {couch_key_tree:get_all_leafs(RevTree), []};
                _ ->
                    case lists:member(latest, Options) of
                    true ->
                        couch_key_tree:get_key_leafs(RevTree, Revs);
                    false ->
                        couch_key_tree:get(RevTree, Revs)
                    end
                end,
                FoundResults =
                lists:map(fun({Rev, Value, FoundRevPath}) ->
                    case Value of
                    ?REV_MISSING ->
                        % we have the rev in our list but know nothing about it
                        {{not_found, missing}, Rev};
                    {IsDeleted, SummaryPtr} ->
                        {ok, make_doc(Db, Id, IsDeleted, SummaryPtr, FoundRevPath)}
                    end
                end, FoundRevs),
                Results = FoundResults ++ [{{not_found, missing}, MissingRev} || MissingRev <- MissingRevs],
                {ok, Results};
            not_found when Revs == all ->
                {ok, []};
            not_found ->
                {ok, [{{not_found, missing}, Rev} || Rev <- Revs]}
            end
        end,
        IdRevs, LookupResults).

open_doc_int(Db, ?LOCAL_DOC_PREFIX ++ _ = Id, _Options) ->
    case couch_btree:lookup(Db#db.local_docs_btree, [Id]) of
    [{ok, {_, {Rev, BodyData}}}] ->
        {ok, #doc{id=Id, revs=[integer_to_list(Rev)], body=BodyData}};
    [not_found] ->
        {not_found, missing}
    end;
open_doc_int(Db, #doc_info{id=Id,rev=Rev,deleted=IsDeleted,summary_pointer=Sp}=DocInfo, Options) ->
    Doc = make_doc(Db, Id, IsDeleted, Sp, [Rev]),
    {ok, Doc#doc{meta=doc_meta_info(DocInfo, [], Options)}};
open_doc_int(Db, #full_doc_info{id=Id,rev_tree=RevTree}=FullDocInfo, Options) ->
    #doc_info{deleted=IsDeleted,rev=Rev,summary_pointer=Sp} = DocInfo =
        couch_doc:to_doc_info(FullDocInfo),
    {[{_Rev,_Value, Revs}], []} = couch_key_tree:get(RevTree, [Rev]),
    Doc = make_doc(Db, Id, IsDeleted, Sp, Revs),
    {ok, Doc#doc{meta=doc_meta_info(DocInfo, RevTree, Options)}};
open_doc_int(Db, Id, Options) ->
    case get_full_doc_info(Db, Id) of
    {ok, FullDocInfo} ->
        open_doc_int(Db, FullDocInfo, Options);
    not_found ->
        throw({not_found, missing})
    end.

doc_meta_info(DocInfo, RevTree, Options) ->
    case lists:member(revs_info, Options) of
    false -> [];
    true ->
        {[RevPath],[]} = 
            couch_key_tree:get_full_key_paths(RevTree, [DocInfo#doc_info.rev]),
        [{revs_info, lists:map(
            fun({Rev, {true, _Sp}}) -> 
                {Rev, deleted};
            ({Rev, {false, _Sp}}) ->
                {Rev, available};
            ({Rev, ?REV_MISSING}) ->
                {Rev, missing}
            end, RevPath)}]
    end ++
    case lists:member(conflicts, Options) of
    false -> [];
    true ->
        case DocInfo#doc_info.conflict_revs of
        [] -> [];
        _ -> [{conflicts, DocInfo#doc_info.conflict_revs}]
        end
    end ++
    case lists:member(deleted_conflicts, Options) of
    false -> [];
    true ->
        case DocInfo#doc_info.deleted_conflict_revs of
        [] -> [];
        _ -> [{deleted_conflicts, DocInfo#doc_info.deleted_conflict_revs}]
        end
    end.

% rev tree functions

doc_to_tree(Doc) ->
    doc_to_tree(Doc, lists:reverse(Doc#doc.revs)).

doc_to_tree(Doc, [RevId]) ->
    [{RevId, Doc, []}];
doc_to_tree(Doc, [RevId | Rest]) ->
    [{RevId, ?REV_MISSING, doc_to_tree(Doc, Rest)}].

make_doc(Db, Id, Deleted, SummaryPointer, RevisionPath) ->
    {BodyData, BinValues} =
    case SummaryPointer of
    nil ->
        {[], []};
    _ ->
        {ok, {BodyData0, BinValues0}} = couch_stream:read_term(Db#db.summary_stream, SummaryPointer),
        {BodyData0, [{Name, {Type, {Db#db.fd, Sp, Len}}} || {Name, {Type, Sp, Len}} <- BinValues0]}   
    end,
    #doc{
        id = Id,
        revs = RevisionPath,
        body = BodyData,
        attachments = BinValues,
        deleted = Deleted
        }.

flush_trees(_Db, [], AccFlushedTrees) ->
    {ok, lists:reverse(AccFlushedTrees)};
flush_trees(#db{fd=Fd}=Db, [InfoUnflushed | RestUnflushed], AccFlushed) ->
        #full_doc_info{rev_tree=Unflushed} = InfoUnflushed,
        Flushed = couch_key_tree:map(
        fun(_Rev, Value) ->
            case Value of
            #doc{attachments=Atts,deleted=IsDeleted}=Doc ->
                % this node value is actually an unwritten document summary,
                % write to disk.
                % make sure the Fd in the written bins is the same Fd we are.
                Bins =
                case Atts of
                [] -> [];
                [{_BName, {_Type, {BinFd, _Sp, _Len}}} | _ ] when BinFd == Fd ->
                    % convert bins, removing the FD.
                    % All bins should have been flushed to disk already.
                    [{BinName, {BinType, BinSp, BinLen}}
                        || {BinName, {BinType, {_Fd, BinSp, BinLen}}}
                        <- Atts];
                _ ->
                    % BinFd must not equal our Fd. This can happen when a database
                    % is being updated during a compaction
                    ?LOG_DEBUG("File where the attachments are written has changed. Possibly retrying.", []),
                    throw(retry)
                end,
                {ok, NewSummaryPointer} = couch_stream:write_term(Db#db.summary_stream, {Doc#doc.body, Bins}),
                {IsDeleted, NewSummaryPointer};
            _ ->
                Value
            end
        end, Unflushed),
    flush_trees(Db, RestUnflushed, [InfoUnflushed#full_doc_info{rev_tree=Flushed} | AccFlushed]).

merge_rev_trees(_NoConflicts, [], [], AccNewInfos, AccSeq) ->
    {ok, lists:reverse(AccNewInfos), AccSeq};
merge_rev_trees(NoConflicts, [NewDocs|RestDocsList],
        [OldDocInfo|RestOldInfo], AccNewInfos, AccSeq) ->
    #full_doc_info{id=Id,rev_tree=OldTree}=OldDocInfo,
    UpdatesRevTree = lists:foldl(
        fun(NewDoc, AccTree) ->
            couch_key_tree:merge(AccTree, doc_to_tree(NewDoc))
        end,
        [], NewDocs),
    NewRevTree = couch_key_tree:merge(OldTree, UpdatesRevTree),
    if NewRevTree == OldTree ->
        % nothing changed
        merge_rev_trees(NoConflicts, RestDocsList, RestOldInfo, AccNewInfos, AccSeq);
    true ->
        if NoConflicts andalso OldTree /= [] ->
            OldConflicts = couch_key_tree:count_leafs(OldTree),
            NewConflicts = couch_key_tree:count_leafs(NewRevTree),
            if NewConflicts > OldConflicts ->
                throw(conflict);
            true -> ok
            end;
        true -> ok
        end,
        NewInfo = #full_doc_info{id=Id,update_seq=AccSeq+1,rev_tree=NewRevTree},
        merge_rev_trees(NoConflicts, RestDocsList,RestOldInfo, 
                [NewInfo|AccNewInfos],AccSeq+1)
    end.

new_index_entries([], DocCount, DelCount, AccById, AccBySeq) ->
    {ok, DocCount, DelCount, AccById, AccBySeq};
new_index_entries([FullDocInfo|RestInfos], DocCount, DelCount, AccById, AccBySeq) ->
    #doc_info{deleted=Deleted} = DocInfo = couch_doc:to_doc_info(FullDocInfo),
    {DocCount2, DelCount2} =
    if Deleted -> {DocCount, DelCount + 1};
    true -> {DocCount + 1, DelCount} 
    end,
    new_index_entries(RestInfos, DocCount2, DelCount2, 
        [FullDocInfo#full_doc_info{deleted=Deleted}|AccById],
        [DocInfo|AccBySeq]).

update_docs_int(Db, DocsList, Options) ->
    #db{
        fulldocinfo_by_id_btree = DocInfoByIdBTree,
        docinfo_by_seq_btree = DocInfoBySeqBTree,
        update_seq = LastSeq,
        doc_count = FullDocCount,
        doc_del_count = FullDelCount
        } = Db,

    % separate out the NonRep documents from the rest of the documents
    {DocsList2, NonRepDocs} = lists:foldl(
        fun([#doc{id=Id}=Doc | Rest]=Docs, {DocsListAcc, NonRepDocsAcc}) ->
            case Id of
            ?LOCAL_DOC_PREFIX ++ _ when Rest==[] ->
                % when saving NR (non rep) documents, you can only save a single rev
                {DocsListAcc, [Doc | NonRepDocsAcc]};
            Id->
                {[Docs | DocsListAcc], NonRepDocsAcc}
            end
        end, {[], []}, DocsList),
    
    Ids = [Id || [#doc{id=Id}|_] <- DocsList2], 
    
    % lookup up the existing documents, if they exist.
    OldDocLookups = couch_btree:lookup(DocInfoByIdBTree, Ids),
    OldDocInfos = lists:zipwith(
        fun(_Id, {ok, FullDocInfo}) ->
            FullDocInfo;
        (Id, not_found) ->
            #full_doc_info{id=Id}
        end,
        Ids, OldDocLookups),
    
    {OldCount, OldDelCount} = lists:foldl(
        fun({ok, FullDocInfo}, {OldCountAcc, OldDelCountAcc}) ->
            case couch_doc:to_doc_info(FullDocInfo) of
            #doc_info{deleted=false} ->
                {OldCountAcc + 1, OldDelCountAcc};
            _ ->
                {OldCountAcc, OldDelCountAcc + 1}
            end;
        (not_found, Acc) ->
            Acc
        end, {0, 0}, OldDocLookups),
    
    % Merge the new docs into the revision trees.
    NoConflicts = lists:member(new_edits, Options),
    {ok, NewDocInfos, NewSeq} = merge_rev_trees(NoConflicts, DocsList2, OldDocInfos, [], LastSeq),
    
    RemoveSeqs =
        [ OldSeq || {ok, #full_doc_info{update_seq=OldSeq}} <- OldDocLookups],
    
    % All regular documents are now ready to write.
    
    % Try to write the local documents first, a conflict might be generated
    {ok, Db2}  = update_local_docs(Db, NonRepDocs),
    
    % Write out the documents summaries (they are stored in the nodes of the rev trees)
    {ok, FlushedDocInfos} = flush_trees(Db2, NewDocInfos, []),
    
    {ok, NewDocsCount, NewDelCount, InfoById, InfoBySeq} =
        new_index_entries(FlushedDocInfos, 0, 0, [], []),

    % and the indexes to the documents
    {ok, DocInfoBySeqBTree2} = couch_btree:add_remove(DocInfoBySeqBTree, InfoBySeq, RemoveSeqs),
    {ok, DocInfoByIdBTree2} = couch_btree:add_remove(DocInfoByIdBTree, InfoById, []),

    Db3 = Db2#db{
        fulldocinfo_by_id_btree = DocInfoByIdBTree2,
        docinfo_by_seq_btree = DocInfoBySeqBTree2,
        update_seq = NewSeq,
        doc_count = FullDocCount + NewDocsCount - OldCount,
        doc_del_count = FullDelCount + NewDelCount - OldDelCount},

    case lists:member(delay_commit, Options) of
    true ->
        {ok, Db3};
    false ->
        {ok, commit_data(Db3)}
    end.

update_local_docs(#db{local_docs_btree=Btree}=Db, Docs) ->
    Ids = [Id || #doc{id=Id} <- Docs],
    OldDocLookups = couch_btree:lookup(Btree, Ids),
    BtreeEntries = lists:zipwith(
        fun(#doc{id=Id,deleted=Delete,revs=Revs,body=Body}, OldDocLookup) ->
            NewRev =
            case Revs of
                [] -> 0;
                [RevStr|_] -> list_to_integer(RevStr)
            end,
            OldRev =
            case OldDocLookup of
                {ok, {_, {OldRev0, _}}} -> OldRev0;
                not_found -> 0
            end,
            case OldRev + 1 == NewRev of
            true ->
                case Delete of
                    false -> {update, {Id, {NewRev, Body}}};
                    true  -> {remove, Id}
                end;
            false ->
                throw(conflict)
            end
            
        end, Docs, OldDocLookups),

    BtreeIdsRemove = [Id || {remove, Id} <- BtreeEntries],
    BtreeIdsUpdate = [ByIdDocInfo || {update, ByIdDocInfo} <- BtreeEntries],

    {ok, Btree2} =
        couch_btree:add_remove(Btree, BtreeIdsUpdate, BtreeIdsRemove),

    {ok, Db#db{local_docs_btree = Btree2}}.


commit_data(#db{fd=Fd, header=Header} = Db) ->
    Header2 = Header#db_header{
        update_seq = Db#db.update_seq,
        summary_stream_state = couch_stream:get_state(Db#db.summary_stream),
        docinfo_by_seq_btree_state = couch_btree:get_state(Db#db.docinfo_by_seq_btree),
        fulldocinfo_by_id_btree_state = couch_btree:get_state(Db#db.fulldocinfo_by_id_btree),
        local_docs_btree_state = couch_btree:get_state(Db#db.local_docs_btree),
        doc_count = Db#db.doc_count,
        doc_del_count = Db#db.doc_del_count
        },
    if Header == Header2 ->
        Db; % unchanged. nothing to do
    true ->
        %ok = couch_file:sync(Fd),
        ok = couch_file:write_header(Fd, <<$g, $m, $k, 0>>, Header2),
        Db#db{header = Header2}
    end.

copy_raw_doc(SrcFd, SrcSp, DestFd, DestStream) ->
    {ok, {BodyData, BinInfos}} = couch_stream:read_term(SrcFd, SrcSp),
    % copy the bin values
    NewBinInfos = lists:map(fun({Name, {Type, BinSp, Len}}) ->
        {ok, NewBinSp} = couch_stream:copy_to_new_stream(SrcFd, BinSp, Len, DestFd),
        {Name, {Type, NewBinSp, Len}}
        end, BinInfos),
    % now write the document summary
    {ok, Sp} = couch_stream:write_term(DestStream, {BodyData, NewBinInfos}),
    Sp.

copy_rev_tree(_SrcFd, _DestFd, _DestStream, []) ->
    [];
copy_rev_tree(SrcFd, DestFd, DestStream, [{RevId, {IsDel, Sp}, []} | RestTree]) ->
    % This is a leaf node, copy it over
    NewSp = copy_raw_doc(SrcFd, Sp, DestFd, DestStream),
    [{RevId, {IsDel, NewSp}, []} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)];
copy_rev_tree(SrcFd, DestFd, DestStream, [{RevId, _, SubTree} | RestTree]) ->
    % inner node, only copy info/data from leaf nodes
    [{RevId, ?REV_MISSING, copy_rev_tree(SrcFd, DestFd, DestStream, SubTree)} | copy_rev_tree(SrcFd, DestFd, DestStream, RestTree)].
    
copy_docs(#db{fd=SrcFd}=Db, #db{fd=DestFd,summary_stream=DestStream}=NewDb, InfoBySeq) ->
    Ids = [Id || #doc_info{id=Id} <- InfoBySeq],
    LookupResults = couch_btree:lookup(Db#db.fulldocinfo_by_id_btree, Ids),
    NewFullDocInfos = lists:map(
        fun({ok, #full_doc_info{rev_tree=RevTree}=Info}) ->
            Info#full_doc_info{rev_tree=copy_rev_tree(SrcFd, DestFd, DestStream, RevTree)}
        end, LookupResults),
    NewDocInfos = [couch_doc:to_doc_info(FullDocInfo) || FullDocInfo <- NewFullDocInfos],
    {ok, DocInfoBTree} =
        couch_btree:add_remove(NewDb#db.docinfo_by_seq_btree, NewDocInfos, []),
    {ok, FullDocInfoBTree} =
        couch_btree:add_remove(NewDb#db.fulldocinfo_by_id_btree, NewFullDocInfos, []),
    NewDb#db{fulldocinfo_by_id_btree=FullDocInfoBTree, docinfo_by_seq_btree=DocInfoBTree}.


copy_compact_docs(Db, NewDb) ->
    EnumBySeqFun =
    fun(#doc_info{update_seq=Seq}=DocInfo, _Offset, {AccNewDb, AccUncopied}) ->
        case couch_util:should_flush() of
        true ->
            NewDb2 = copy_docs(Db, AccNewDb, lists:reverse([DocInfo | AccUncopied])),
            {ok, {commit_data(NewDb2#db{update_seq=Seq}), []}};
        false ->    
            {ok, {AccNewDb, [DocInfo | AccUncopied]}}
        end
    end,
    {ok, {NewDb2, Uncopied}} =
        couch_btree:foldl(Db#db.docinfo_by_seq_btree, NewDb#db.update_seq + 1, EnumBySeqFun, {NewDb, []}),

    case Uncopied of
    [#doc_info{update_seq=LastSeq} | _] ->
        commit_data( copy_docs(Db, NewDb2#db{update_seq=LastSeq},
            lists:reverse(Uncopied)));
    [] ->
        NewDb2
    end.

start_copy_compact_int(#db{name=Name,filepath=Filepath}=Db, CopyLocal) ->
    CompactFile = Filepath ++ ".compact",
    ?LOG_DEBUG("Compaction process spawned for db \"~s\"", [Name]),
    case couch_file:open(CompactFile) of
    {ok, Fd} ->
        ?LOG_DEBUG("Found existing compaction file for db \"~s\"", [Name]),
        {ok, Header} = couch_file:read_header(Fd, <<$g, $m, $k, 0>>);
    {error, enoent} -> %
        {ok, Fd} = couch_file:open(CompactFile, [create]),
        Header =  #db_header{},
        ok = couch_file:write_header(Fd, <<$g, $m, $k, 0>>, Header)
    end,
    NewDb = init_db(Name, CompactFile, Fd, Header),
    NewDb2 = copy_compact_docs(Db, NewDb),
    NewDb3 =
    case CopyLocal of
    true ->
        % suck up all the local docs into memory and write them to the new db
        {ok, LocalDocs} = couch_btree:foldl(Db#db.local_docs_btree,
                fun(Value, _Offset, Acc) -> {ok, [Value | Acc]} end, []),
        {ok, NewLocalBtree} = couch_btree:add(NewDb2#db.local_docs_btree, LocalDocs),
        commit_data(NewDb2#db{local_docs_btree=NewLocalBtree});
    _ ->
        NewDb2
    end,
    close_db(NewDb3),
    Db#db.update_pid ! {compact_done, CompactFile}.