MetaFor: Hinweise zur Implementierung

MetaFor besteht aus einem Frontend und einem Backend. Dazwischen vermittelt ein Modul, das gleichzeitig einen sehr einfachen Cache implementiert.

Backend

Das Backend ist in Haskell geschrieben. Wer ein Modul für ein neues Forum beisteuern möchte, kann sich melden. Webmaster, die bei der Augnahme ihres eigenen Forums behilflich sein möchten, könnten die Daten in einem leicht zu parsenden Format, zum Beispiel in XML, anbieten.

Frontend

Das Frontend ist ein sehr einfaches Yaws-Skript, also HTML gemischt mit Erlang. Hier ist der aktuelle Quelltext:

<erl>

out(A=#arg{headers=#headers{user_agent="Convera"++_}}) ->
    [{status, 403},
     {content, "text/plain", "You are stupid.  Go away."},
     break];
out(A) -> ok.

</erl><!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN"
            "http://www.w3.org/TR/html4/loose.dtd">
<html>
<head>
  <meta http-equiv="Content-Type" content="text/html; charset=iso-8859-1">
  <meta http-equiv="Content-Script-Type" content="text/javascript">  
  <title>MetaFor</title>
  <link rel="alternate stylesheet" type="text/css" 
        href="http://www.fu-mathe-team.de/fumt-css.yaws"
        title="FU Mathe Team">
  <link rel="stylesheet" type="text/css" 
        href="http://laufen-in-wuppertal.de/drsl/dat/drsl.css"
        title="drsl">
  <link rel="icon" href="http://www.laufen-in-wuppertal.de/drsl/drsl.ico" 
        type="image/x-icon">
  <style type="text/css">
      table{border-collapse:collapse; margin:3px}
      td,th{border:1px solid #999; padding:0.2ex 0.3em;
	    vertical-align:middle}
      img{border:0}
      .even {}
      .odd {background-color:#e8f0ff}
  </style>
</head>

<body>

<script type="text/javascript"><!--
function openext() {w=window.open('about:blank','mf-ext');
		    w.focus();}
// -->
</script>

<h1 style="text-align:center">
<img src="http://www.laufzentrale.de/bilder/metafor.gif" alt="MetaFor"
     width=170 height=104>
</h1>

<p><b>Da dieser Service schon lange nicht mehr gepflegt wurde und
kaum noch funktioniert hat, wurde er eingestellt.</b>

<p> Die unten aufgef&uuml;hrten Beitr&auml;ge sind in den genannten
Foren und Newsgruppen erschienen, zu denen wir in keinerlei Beziehung
stehen.  Die Verweise f&uuml;hren dorthin.

<p>
Wir haben jetzt auch einen <a href="metafor-rss.yaws">RSS-Feed</a>.  Da er die
Texte der Artikel nicht enthält, ist er nur von eingeschränkter Nützlichkeit,
eignet sich aber zum Beispiel für die Live&nbsp;Bookmarks von <a
target="_top"
href="http://www.mozilla.org/products/firefox/">Firefox&nbsp;1.0</a>.  Teilt
uns Eure Meinung mit!

<p><a href="metafor-source.yaws"
>Hinweise zur Implementierung</a>
<p>
Kleine Bemerkung:  Die externen Seiten werden prinzipiell bei jedem Aufruf 
neu geladen, höchstens aber alle drei Minuten.
<hr>

<erl>

-record(forum, {short, long, url, pic, hpurl, fid}).
-record(article, {author, subject, date, url}).
-record(thread, {first, last, url, nanswers, tid, vid}).


-define(sel_cookie, "metaforlist").
-define(default_selection, ["drslv", "drslm", 
			    "LA25", "LA23", "LA19",
			    "BM21", "BM3",
			    "Tip7","Tip8",
			    "LT67211", "LT67219"]).

get_selection(A) ->
    case (A#arg.req)#http_request.method of
	'POST' ->
	    get_sel_from_post(A);
	_ ->
	    {[], get_sel_from_cookie(A)}
    end.

get_sel_from_post(A) ->
    {Save, List} = lists:foldl(fun acc_list/2, {false, []},
			       yaws_api:parse_post(A)),
    {case Save of
	 true ->
	     yaws_api:setcookie(?sel_cookie, 
				encode_sel_cookie(List),
				"/",
				"Fri, 01-Jan-2010 00:00:00 GMT");
	 false ->
	     []
     end,
     List}.

acc_list({"check", X}, {B, L}) ->
    {B, [X|L]};
acc_list({"save", _}, {_, L}) ->
    {true, L};
acc_list(_, X) ->
    X.

get_sel_from_cookie(A) ->
    case yaws_api:find_cookie_val(?sel_cookie, 
				  (A#arg.headers)#headers.cookie) of
	[$:|C] ->
	    case lists:member($\s, C) of
		true -> % old cookie
		    ?default_selection;
		false ->
		    decode_sel_cookie(C)
	    end;
	_  ->
	    ?default_selection
    end.

decode_sel_cookie(C) ->
    decode_sel_cookie([], [], C).

decode_sel_cookie(X, Xs, []) ->
    [lists:reverse(X)|Xs];
decode_sel_cookie(X, Xs, [$:|Cs]) ->
    decode_sel_cookie([], [lists:reverse(X)|Xs], Cs);
decode_sel_cookie(X, Xs, [C|Cs]) ->
    decode_sel_cookie([C|X], Xs, Cs).

encode_sel_cookie([]) ->
    [];
encode_sel_cookie([H|T]) ->
    [$:|H] ++ encode_sel_cookie(T).


out(A) -> [];   % No more MetaFor

out(A=#arg{headers=#headers{user_agent="Convera"++_}}) ->
    [{status, 403},
     {content, "text/plain", "You are stupid.  Go away."},
     break];

out(A) ->
    {Out, Selection} = get_selection(A),
    [{header, {cache_control, "max-age=180"}},
     {header, {"Vary", "Cookie"}},
     Out, 
     main(A, Selection)].

main(A, Selection) ->
    Data = metafor:get_threads(Selection),
    success(A, Data, Selection).

error() ->
    {ehtml, [{p}, "Leider ist ein Fehler aufgetreten", {hr}]}.

na() ->
    {em, [], "n/a"}.

f_string(undefined) ->
    na();
f_string(S) ->
    {pre_html, S}.

f_integer(undefined) ->
    na();
f_integer(I) ->
    integer_to_list(I).

f_date(undefined) ->
    na();
f_date({{Y,M,D},{H,Min,_S}}) ->
    [integer_to_list(D),$.,integer_to_list(M),$.,two_d(Y),$ ,
     integer_to_list(H),$:,two_d(Min)].

two_d(N) ->
    D0 = N rem 10,
    D1 = (N div 10) rem 10,
    [$0 + D1, $0 + D0].

f_url(undefined, _) ->
    na();
f_url(URL, Text) ->
    foreign_a(URL, Text).

base_name(Cs) ->
    base_name(Cs, []).

base_name([], A) ->
    lists:reverse(A);
base_name([$.|T], A) ->
    lists:reverse(A);
base_name([C|T], A) ->
    base_name(T, [C|A]).


format_thread(N, {T,F}) ->
    First = T#thread.first,
    Last = T#thread.last,
    DoLastSubject = case Last of
			#article{url=LU,subject=LS} 
			-> (LU =/= undefined) or (LS =/= undefined);
			undefined -> false
		    end,
    ThreadURL = case T#thread.url of
		    undefined ->
			First#article.url;
		    TURL -> TURL
		end,
    LastArt = case Last of
		  A=#article{} -> A;
		  undefined -> T#thread.first
	      end,
    S = case DoLastSubject of 
	    false -> fun (X) -> X end;
	    true -> fun(X) -> [{rowspan,2}|X] end
	end,
    RowClass = row_class(N),
    [{tr, [RowClass],
      [{td, S([]), case F#forum.pic of
		       undefined -> [];
		       Pic -> foreign_a(case F#forum.hpurl of
					    undefined -> F#forum.url;
					    URL -> URL
					end,
					{img, [{src,["icons/",Pic]},
					       {alt, base_name(Pic)},
					       {width, 28}, {height,28}]})
		   end},
       {td, S([]), foreign_a(F#forum.url, 
			     f_string(F#forum.short))},
       {td, S([]),
	[f_date(LastArt#article.date),{br},f_string(LastArt#article.author)]},
       {td, [], foreign_a(case First#article.url of
			      undefined ->
				  T#thread.url;
			      U -> U
			  end,
			  F#forum.url,
			  f_string(First#article.subject))},
       case Last of
	   undefined ->
	       {td, S([{align, center}]), na()};
	   _ ->
	       {td, S([]), [f_date(First#article.date),
			    {br},
			    f_string(First#article.author)]}
       end,
       {td, S([]), foreign_a(T#thread.url, f_integer(T#thread.nanswers))},
       {td, S([]), case T#thread.vid of
		       undefined -> 
			   {a, [{href, 
				 ["http://www.lg-w.de/events"
				  "/put_vid.php?"
				  %% "tid=",
				  %% yaws_api:url_encode(F#forum.fid), $:,
				  %% yaws_api:url_encode(T#thread.tid), 
				  %% "&amp;"
				  "tname=",
				  yaws_api:url_encode(First#article.subject),
				  "&amp;url=",
				  yaws_api:url_encode(
				    olzo_url(ThreadURL))]}],
			    {img, [{src, "/icons/pen.gif"},
				   {alt, "neu"},
				   {title, "einer Veranstaltung zuordnen"},
				   {width, 18}, {height,15}]
			    }};
		       VID -> 
			   {a, [{href, ["http://www.lg-w.de/events"
					"/events.php?vid=", 
					integer_to_list(VID)]}],
			    {img, 
			     [{src, 
			       "http://laufen-in-wuppertal.de/pix/star.gif"},
			      {alt, VID},
			      {title, "zur Veranstaltung"},
			      {width, 15}, {height,15}]}}
		   end}
      ]},
     case DoLastSubject of
	 false ->
	     [];
	 true ->
	     [{tr, [RowClass], {td, [],
				foreign_a(Last#article.url,
					  F#forum.url,
					  f_string(Last#article.subject))}}]
     end
    ].


olzo_url("news:"++MID) ->
    "http://www.drsl.de/?mid=" ++ MID;
olzo_url(URL) -> URL.


foreign_a(undefined, _, T) ->
    T;
foreign_a("news:"++MID, "news:"++Group, T) ->
    foreign_a(msg_url(MID, Group), T); 
foreign_a(URL, _, T) ->
    foreign_a(URL, T).

foreign_a(undefined, T) ->
    T;
foreign_a(URL, T) ->
    {a, [{target, "_top"},
	 %{target, "mf-ext"},
	 %{onclick, "openext();return true;"},
	 {href, mangleURL(URL)}],
     T}.

mangleURL("news:"++X) ->
    case lists:member($@, X) of
	true ->
	    news_msg_url(X);
	false -> 
	    news_group_url(X)
    end;
mangleURL(URL) ->
    yaws_api:htmlize(URL).

news_msg_url(MID) ->
    ["http://drsl.de/?mid=", yaws_api:url_encode(MID)].

%news_group_url(Name) ->
%    ["http://groups-beta.google.com/group/", yaws_api:htmlize(Name)].

news_group_url(Name) ->
    ["http://www.newsoffice.de/groups/index.php?action=thread&amp;group=", 
     yaws_api:htmlize(Name)].

msg_url(MID, Group) ->
    ["http://www.newsoffice.de/groups/index.php?action=article&id=",
     %% "%3C", yaws_api:url_encode(MID), "%3E",
     http_base_64:encode("<"++MID++">"),
     "&group=", Group].

recent_date(#thread{first=F, last=L}) ->
    case a_date(L) of
	undefined ->
	    a_date(F);
	D -> D
    end.

a_date(undefined) ->
    undefined;
a_date(#article{date=D}) ->
    D.

row_class(N) ->
    {class, if N rem 2 == 0 -> "even";
	       true -> "odd"
	    end}.

format_forum(N, Selection, 
	     {#forum{long=Name,url=URL,short=Abbrev,fid=FID},TS}) ->
    InSelection = lists:member(FID, Selection),
    {tr, [row_class(N)],
     [{td, [], foreign_a(URL, Name)},
      {td, [], Abbrev}, 
      {td, [{align, "right"}], 
       case InSelection of
	   true -> integer_to_list(length(TS));
	   false -> na()
       end},
      {td, [],
       {input, case InSelection of
		   true -> [checked];
		   false -> []
	       end ++ [{type, "checkbox"}, {name,"check"},
		       {value, FID}]}
      }
     ]}.


success(A, Data, Selection) ->
    Data0 = [{T,F}  || {F,TS} <- Data, T <- TS ],
    {ehtml, 
     [{p}, {a, [{href, "#forums"}], 
	    "Zur Forenübersicht und den Einstellungen."},
      threads(A, Data0), forums(A, Data, Selection)]
    }.


forums(A, Data, Selection) ->
    [{h2, [], {a, [{name, "forums"}], "Foren"}},
     {p},
     {form, [{action, A#arg.server_path}, {method, "post"}], 
      [{table, [{border,1}], 
	[{tr, [], [{th, [], "Name"}, {th, [], "Kürzel"}, 
		   {th, [], "Threads"}, {th, [], "Aktiv"}]}
	 | numbered_map(fun(N, D) -> format_forum(N, Selection, D) end, 
			Data)]},
       {input, [{type, "submit"}, {name, "change"}, 
		{value, "Auswahl ändern"}]},
       " und in Cookie speichern ",
       {input, [checked, {type, "checkbox"}, {name, "save"}, {value, "ok"}]}
      ]
     }].


take_while(F, []) ->
    [];
take_while(F, [H|T]) ->
    case F(H) of
	true ->
	    [H|take_while(F,T)];
	_ ->
	    []
    end.

numbered_map(F, Rs) ->
    numbered_map(F, 0, Rs, []).

numbered_map(F, N, [], A) ->
    lists:reverse(A);
numbered_map(F, N, [R|T], A) ->
    numbered_map(F, N+1, T, [F(N, R)|A]).

%% could be improved
remove_dups([]) ->
    [];
remove_dups([X|T]) ->
    remove_dups(X, T, []).

remove_dups(X, [], A) ->
    lists:reverse([X|A]);
remove_dups(X={#thread{last=undefined,first=#article{url=ID}},_}, 
	    [Y={#thread{last=undefined,first=#article{url=ID}},_}|T], A) -> 
    remove_dups(X, T, A);
remove_dups(X, [Y|T], A) ->
    remove_dups(Y, T, [X|A]).


from_date(A) ->
    case queryvar(A, "from_date") of
	{ok, Date} ->
	    case io_lib:fread("{{~d,~d,~d},{~d,~d,~d}}", Date) of
		{ok, [Y,M,D,H,Min,S], []} ->
		    {{Y,M,D},{H,Min,S}};
		_ ->
		    undefined
	    end;
	_ -> undefined
    end.

threads(A, Data0) ->
    TLimit = 50,
    Data1 = remove_dups(
	      lists:sort(fun({X,_},{Y,_}) -> 
				 recent_date(X)>recent_date(Y) end,
			 Data0)),
    L = length(Data1),
    {Threads, StripComment} =
	case from_date(A) of
	    undefined ->
		case L > TLimit of
		    true ->
			{lists:sublist(Data1, TLimit),
			 [{p},"Die ",integer_to_list(TLimit),
			  " aktuellsten folgen."]};
		    false ->
			{Data1, []}
		end;
	    FromDate ->
		{take_while(fun({X,_}) -> recent_date(X) >= FromDate end,
			    Data1),
		 [{p}, "Artikel ab ", f_date(FromDate), " folgen."]}
	end,
    [{h2, [], {a, [{name, "threads"}], "Diskussionen"}},
     {p},
     {a, [{href, [A#arg.server_path, "?from_date=", 
		  yaws_api:url_encode(lists:flatten(
					f("~w",[calendar:local_time()])))
		 ]}
	  %%,{rel, "nofollow"}
	 ],
      "Neue Artikel holen."},
     {p},
     f("~p Threads wurden gefunden und nach Aktualität sortiert.", [L]),
     StripComment,
     {table, [{border,1}], 
      [{tr, [], [{th, [{colspan,2}], "Forum"}, 
		 {th, [], "Letzter Beitrag"},
		 {th, [], "Thema"},
		 {th, [], "Erster Beitrag"},
		 {th, [], "Antw."},
		 {th, [], "VID"}]}
       | numbered_map(fun format_thread/2, Threads)]}].
     


</erl>

<p id="seitenfuss">Diese Seite ist Teil eines Framesets.
<br>Startseite:
<a href="http://www.drsl.de/metafor/"
   target="_top">http://www.drsl.de/metafor/</a></p>

</body>
</html>



Cache

Zwischen dem Frontend und dem Backend vermittelt ein Erlang-Modul, das unter anderem dafür sorgt, dass zu jedem Zeitpunkt höchstens eine Instanz des Backends läuft. Außerdem werden die vom Backend gelieferten Daten für eine kurze Zeit zwischengespeichert.

-module(metafor).

-export([get_threads/1, main_loop/1, start/1]).

-define(cache_timeout, 180000).

-record(forum, {short, long, url, pic, hpurl, fid}).

start(_SC) ->
    Server=proc_lib:spawn_link(fun()->main_loop([]) end),
    register(metafor_server, Server).

main_loop(Data) ->
    receive
	{PID, metafor_req, Forums} ->
	    Data1 = update_data(Data, Forums),
	    Ans = prepare_answer(Data1, Forums),
	    PID ! {metafor_ans, Ans},
	    ?MODULE:main_loop(Data1);
	_ ->
	    ?MODULE:main_loop(Data)
    end.

get_threads(Forums) ->
    metafor_server ! {self(), metafor_req, Forums},
    receive
	{metafor_ans, Ans} ->
	    Ans
    end.

%% Cache format:
%% [{Forum, {TimeStamp, Data} | undefined}}]

get_q(_FID, _, []) ->
    true;
get_q(FID, Now, [{#forum{fid=FID}, FData}|_]) ->
    case FData of
	undefined ->
	    true;
	{TS, _} ->
	    timer:now_diff(Now, TS) > ?cache_timeout * 1000
    end;
get_q(FID, Now, [_|T]) ->
    get_q(FID, Now, T).

merge_with_cache(Xs, Cache, Now) ->
    lists:map(fun({F,[]}) ->
		      mwc(F#forum.fid, F, Cache);
		 ({F,D}) -> {F, {Now, D}}
	      end, Xs).

mwc(FID, F, [{#forum{fid=FID}, DC}|_]) ->
    {F, DC};
mwc(FID, F, [_|T]) ->	    
    mwc(FID, F, T);
mwc(_FID, F, []) ->
    {F, undefined}.


update_data(Data, Forums) ->
    Now=now(),
    ForumsToGet = lists:filter(
		    fun(F) -> get_q(F, Now, Data) end,
		    Forums),
    %%io:format("ForumsToGet:~n~p~n", [ForumsToGet]),
    if (ForumsToGet == []) and (Data /= []) ->
	    %%io:format("serving completely from cache~n", []),
	    Data;
       true ->
	    %%io:format("opening port~n", []),
	    Port = open_port({spawn,
			      "/home/carsten/cgi/metafor1 +RTS -H10m -M20m"},
			     [{cd, "/tmp"},
			      exit_status,
			      {packet, 4},
			      binary]),
	    port_command(Port, term_to_binary(ForumsToGet)),
	    %%io:format("data sent~n", []),
	    receive
		{Port, {data, Bin}} when binary(Bin) ->
		    %%io:format("data received~n", []),
		    Now1 = now(),
		    merge_with_cache(binary_to_term(Bin), Data, Now1);
		{Port, _} ->
		    Data
	    after 30000 ->
		    Data
	    end
    end.



prepare_answer(Data, Forums) ->
    lists:map(fun({F, undefined}) ->
		      {F, []};
		 ({F, {_TS, D}}) ->
		      case lists:member(F#forum.fid, Forums) of
			  true -> {F, D};
			  false -> {F, []}
		      end
	      end, Data).