%% @author Bob Ippolito %% @copyright 2007 Mochi Media, Inc. %% @doc Utilities for parsing multipart/form-data. -module(mochiweb_multipart). -author('bob@mochimedia.com'). -export([parse_form/1, parse_form/2]). -export([parse_multipart_request/2]). -export([test/0]). -define(CHUNKSIZE, 4096). -record(mp, {state, boundary, length, buffer, callback, req}). %% TODO: DOCUMENT THIS MODULE. parse_form(Req) -> parse_form(Req, fun default_file_handler/2). parse_form(Req, FileHandler) -> Callback = fun (Next) -> parse_form_outer(Next, FileHandler, []) end, {_, _, Res} = parse_multipart_request(Req, Callback), Res. parse_form_outer(eof, _, Acc) -> lists:reverse(Acc); parse_form_outer({headers, H}, FileHandler, State) -> {"form-data", H1} = proplists:get_value("content-disposition", H), Name = proplists:get_value("name", H1), Filename = proplists:get_value("filename", H1), case Filename of undefined -> fun (Next) -> parse_form_value(Next, {Name, []}, FileHandler, State) end; _ -> ContentType = proplists:get_value("content-type", H), Handler = FileHandler(Filename, ContentType), fun (Next) -> parse_form_file(Next, {Name, Handler}, FileHandler, State) end end. parse_form_value(body_end, {Name, Acc}, FileHandler, State) -> Value = binary_to_list(iolist_to_binary(lists:reverse(Acc))), State1 = [{Name, Value} | State], fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; parse_form_value({body, Data}, {Name, Acc}, FileHandler, State) -> Acc1 = [Data | Acc], fun (Next) -> parse_form_value(Next, {Name, Acc1}, FileHandler, State) end. parse_form_file(body_end, {Name, Handler}, FileHandler, State) -> Value = Handler(eof), State1 = [{Name, Value} | State], fun (Next) -> parse_form_outer(Next, FileHandler, State1) end; parse_form_file({body, Data}, {Name, Handler}, FileHandler, State) -> H1 = Handler(Data), fun (Next) -> parse_form_file(Next, {Name, H1}, FileHandler, State) end. default_file_handler(Filename, ContentType) -> default_file_handler_1(Filename, ContentType, []). default_file_handler_1(Filename, ContentType, Acc) -> fun(eof) -> Value = iolist_to_binary(lists:reverse(Acc)), {Filename, ContentType, Value}; (Next) -> default_file_handler_1(Filename, ContentType, [Next | Acc]) end. parse_multipart_request(Req, Callback) -> %% TODO: Support chunked? Length = list_to_integer(Req:get_header_value("content-length")), Boundary = iolist_to_binary( get_boundary(Req:get_header_value("content-type"))), Prefix = <<"\r\n--", Boundary/binary>>, BS = size(Boundary), Chunk = read_chunk(Req, Length), Length1 = Length - size(Chunk), <<"--", Boundary:BS/binary, "\r\n", Rest/binary>> = Chunk, feed_mp(headers, #mp{boundary=Prefix, length=Length1, buffer=Rest, callback=Callback, req=Req}). parse_headers(<<>>) -> []; parse_headers(Binary) -> parse_headers(Binary, []). parse_headers(Binary, Acc) -> case find_in_binary(<<"\r\n">>, Binary) of {exact, N} -> <> = Binary, parse_headers(Rest, [split_header(Line) | Acc]); not_found -> lists:reverse([split_header(Binary) | Acc]) end. split_header(Line) -> {Name, [$: | Value]} = lists:splitwith(fun (C) -> C =/= $: end, binary_to_list(Line)), {mochiweb_util:to_lower(string:strip(Name)), mochiweb_util:parse_header(Value)}. read_chunk(Req, Length) when Length > 0 -> case Length of Length when Length < ?CHUNKSIZE -> Req:recv(Length); _ -> Req:recv(?CHUNKSIZE) end. read_more(State=#mp{length=Length, buffer=Buffer, req=Req}) -> Data = read_chunk(Req, Length), Buffer1 = <>, State#mp{length=Length - size(Data), buffer=Buffer1}. feed_mp(headers, State=#mp{buffer=Buffer, callback=Callback}) -> {State1, P} = case find_in_binary(<<"\r\n\r\n">>, Buffer) of {exact, N} -> {State, N}; _ -> S1 = read_more(State), %% Assume headers must be less than ?CHUNKSIZE {exact, N} = find_in_binary(<<"\r\n\r\n">>, S1#mp.buffer), {S1, N} end, <> = State1#mp.buffer, NextCallback = Callback({headers, parse_headers(Headers)}), feed_mp(body, State1#mp{buffer=Rest, callback=NextCallback}); feed_mp(body, State=#mp{boundary=Prefix, buffer=Buffer, callback=Callback}) -> case find_boundary(Prefix, Buffer) of {end_boundary, Start, Skip} -> <> = Buffer, C1 = Callback({body, Data}), C2 = C1(body_end), {State#mp.length, Rest, C2(eof)}; {next_boundary, Start, Skip} -> <> = Buffer, C1 = Callback({body, Data}), feed_mp(headers, State#mp{callback=C1(body_end), buffer=Rest}); {maybe, Start} -> <> = Buffer, feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), buffer=Rest})); not_found -> {Data, Rest} = {Buffer, <<>>}, feed_mp(body, read_more(State#mp{callback=Callback({body, Data}), buffer=Rest})) end. get_boundary(ContentType) -> {"multipart/form-data", Opts} = mochiweb_util:parse_header(ContentType), case proplists:get_value("boundary", Opts) of S when is_list(S) -> S end. find_in_binary(B, Data) when size(B) > 0 -> case size(Data) - size(B) of Last when Last < 0 -> partial_find(B, Data, 0, size(Data)); Last -> find_in_binary(B, size(B), Data, 0, Last) end. find_in_binary(B, BS, D, N, Last) when N =< Last-> case D of <<_:N/binary, B:BS/binary, _/binary>> -> {exact, N}; _ -> find_in_binary(B, BS, D, 1 + N, Last) end; find_in_binary(B, BS, D, N, Last) when N =:= 1 + Last -> partial_find(B, D, N, BS - 1). partial_find(_B, _D, _N, 0) -> not_found; partial_find(B, D, N, K) -> <> = B, case D of <<_Skip:N/binary, B1:K/binary>> -> {partial, N, K}; _ -> partial_find(B, D, 1 + N, K - 1) end. find_boundary(Prefix, Data) -> case find_in_binary(Prefix, Data) of {exact, Skip} -> PrefixSkip = Skip + size(Prefix), case Data of <<_:PrefixSkip/binary, "\r\n", _/binary>> -> {next_boundary, Skip, size(Prefix) + 2}; <<_:PrefixSkip/binary, "--\r\n", _/binary>> -> {end_boundary, Skip, size(Prefix) + 4}; _ when size(Data) < PrefixSkip + 4 -> %% Underflow {maybe, Skip}; _ -> %% False positive not_found end; {partial, Skip, Length} when (Skip + Length) =:= size(Data) -> %% Underflow {maybe, Skip}; _ -> not_found end. with_socket_server(ServerFun, ClientFun) -> {ok, Server} = mochiweb_socket_server:start([{ip, "127.0.0.1"}, {port, 0}, {loop, ServerFun}]), Port = mochiweb_socket_server:get(Server, port), {ok, Client} = gen_tcp:connect("127.0.0.1", Port, [binary, {active, false}]), Res = (catch ClientFun(Client)), mochiweb_socket_server:stop(Server), Res. fake_request(Socket, ContentType, Length) -> mochiweb_request:new(Socket, 'POST', "/multipart", {1,1}, mochiweb_headers:make( [{"content-type", ContentType}, {"content-length", Length}])). test_callback(Expect, [Expect | Rest]) -> case Rest of [] -> ok; _ -> fun (Next) -> test_callback(Next, Rest) end end. test_parse3() -> ContentType = "multipart/form-data; boundary=---------------------------7386909285754635891697677882", BinContent = <<"-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------7386909285754635891697677882\r\nContent-Disposition: form-data; name=\"file\"; filename=\"test_file.txt\"\r\nContent-Type: text/plain\r\n\r\nWoo multiline text file\n\nLa la la\r\n-----------------------------7386909285754635891697677882--\r\n">>, Expect = [{headers, [{"content-disposition", {"form-data", [{"name", "hidden"}]}}]}, {body, <<"multipart message">>}, body_end, {headers, [{"content-disposition", {"form-data", [{"name", "file"}, {"filename", "test_file.txt"}]}}, {"content-type", {"text/plain", []}}]}, {body, <<"Woo multiline text file\n\nLa la la">>}, body_end, eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> case gen_tcp:send(Socket, BinContent) of ok -> exit(normal) end end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, size(BinContent)), Res = parse_multipart_request(Req, TestCallback), {0, <<>>, ok} = Res, ok end, ok = with_socket_server(ServerFun, ClientFun), ok. test_parse2() -> ContentType = "multipart/form-data; boundary=---------------------------6072231407570234361599764024", BinContent = <<"-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"hidden\"\r\n\r\nmultipart message\r\n-----------------------------6072231407570234361599764024\r\nContent-Disposition: form-data; name=\"file\"; filename=\"\"\r\nContent-Type: application/octet-stream\r\n\r\n\r\n-----------------------------6072231407570234361599764024--\r\n">>, Expect = [{headers, [{"content-disposition", {"form-data", [{"name", "hidden"}]}}]}, {body, <<"multipart message">>}, body_end, {headers, [{"content-disposition", {"form-data", [{"name", "file"}, {"filename", ""}]}}, {"content-type", {"application/octet-stream", []}}]}, {body, <<>>}, body_end, eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> case gen_tcp:send(Socket, BinContent) of ok -> exit(normal) end end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, size(BinContent)), Res = parse_multipart_request(Req, TestCallback), {0, <<>>, ok} = Res, ok end, ok = with_socket_server(ServerFun, ClientFun), ok. test_parse_form() -> ContentType = "multipart/form-data; boundary=AaB03x", "AaB03x" = get_boundary(ContentType), Content = mochiweb_util:join( ["--AaB03x", "Content-Disposition: form-data; name=\"submit-name\"", "", "Larry", "--AaB03x", "Content-Disposition: form-data; name=\"files\";" ++ "filename=\"file1.txt\"", "Content-Type: text/plain", "", "... contents of file1.txt ...", "--AaB03x--", ""], "\r\n"), BinContent = iolist_to_binary(Content), ServerFun = fun (Socket) -> case gen_tcp:send(Socket, BinContent) of ok -> exit(normal) end end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, size(BinContent)), Res = parse_form(Req), [{"submit-name", "Larry"}, {"files", {"file1.txt", {"text/plain",[]}, <<"... contents of file1.txt ...">>} }] = Res, ok end, ok = with_socket_server(ServerFun, ClientFun), ok. test_parse() -> ContentType = "multipart/form-data; boundary=AaB03x", "AaB03x" = get_boundary(ContentType), Content = mochiweb_util:join( ["--AaB03x", "Content-Disposition: form-data; name=\"submit-name\"", "", "Larry", "--AaB03x", "Content-Disposition: form-data; name=\"files\";" ++ "filename=\"file1.txt\"", "Content-Type: text/plain", "", "... contents of file1.txt ...", "--AaB03x--", ""], "\r\n"), BinContent = iolist_to_binary(Content), Expect = [{headers, [{"content-disposition", {"form-data", [{"name", "submit-name"}]}}]}, {body, <<"Larry">>}, body_end, {headers, [{"content-disposition", {"form-data", [{"name", "files"}, {"filename", "file1.txt"}]}}, {"content-type", {"text/plain", []}}]}, {body, <<"... contents of file1.txt ...">>}, body_end, eof], TestCallback = fun (Next) -> test_callback(Next, Expect) end, ServerFun = fun (Socket) -> case gen_tcp:send(Socket, BinContent) of ok -> exit(normal) end end, ClientFun = fun (Socket) -> Req = fake_request(Socket, ContentType, size(BinContent)), Res = parse_multipart_request(Req, TestCallback), {0, <<>>, ok} = Res, ok end, ok = with_socket_server(ServerFun, ClientFun), ok. test_find_boundary() -> B = <<"\r\n--X">>, {next_boundary, 0, 7} = find_boundary(B, <<"\r\n--X\r\nRest">>), {next_boundary, 1, 7} = find_boundary(B, <<"!\r\n--X\r\nRest">>), {end_boundary, 0, 9} = find_boundary(B, <<"\r\n--X--\r\nRest">>), {end_boundary, 1, 9} = find_boundary(B, <<"!\r\n--X--\r\nRest">>), not_found = find_boundary(B, <<"--X\r\nRest">>), {maybe, 0} = find_boundary(B, <<"\r\n--X\r">>), {maybe, 1} = find_boundary(B, <<"!\r\n--X\r">>), P = <<"\r\n-----------------------------16037454351082272548568224146">>, B0 = <<55,212,131,77,206,23,216,198,35,87,252,118,252,8,25,211,132,229, 182,42,29,188,62,175,247,243,4,4,0,59, 13,10,45,45,45,45,45,45,45, 45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45,45, 49,54,48,51,55,52,53,52,51,53,49>>, {maybe, 30} = find_boundary(P, B0), ok. test_find_in_binary() -> {exact, 0} = find_in_binary(<<"foo">>, <<"foobarbaz">>), {exact, 1} = find_in_binary(<<"oo">>, <<"foobarbaz">>), {exact, 8} = find_in_binary(<<"z">>, <<"foobarbaz">>), not_found = find_in_binary(<<"q">>, <<"foobarbaz">>), {partial, 7, 2} = find_in_binary(<<"azul">>, <<"foobarbaz">>), {exact, 0} = find_in_binary(<<"foobarbaz">>, <<"foobarbaz">>), {partial, 0, 3} = find_in_binary(<<"foobar">>, <<"foo">>), {partial, 1, 3} = find_in_binary(<<"foobar">>, <<"afoo">>), ok. test() -> test_find_in_binary(), test_find_boundary(), test_parse(), test_parse2(), test_parse3(), test_parse_form(), ok.