Archive

Archive for the ‘erlang’ Category

Erlang’s Dynamism

June 30th, 2009 Daniello 3 comments

This is more verbose answer to @bubbafat twitt-question. It started lkie this:

  • @bubbafat: Functions used by spawn to start a process must be exported. Why doesn’t erl compiler error when this is missed?  #erlang
  • @danielllo: @bubbafat ‘cos it is possible to define and load  new modules later on during runtime in #erlang
  • @bubbafat: @danielllo Thx. Do you mean redefining the module at runtime or that the func might resolve in a different module loaded later? #erlang
  • @danielllo: You have many ways of referencing #erlang module:function not known at compile time.
    1. you can load the precompiled module at later time from a path not being provided to compiler.
    2. you can use M:F(Args) function invocation in #erlang, any of M, F, Args being variables dynamically referencing module, function and argument list.
    3. you can construct erlang AST tree programmaticaly, compile it at runtime and load the resulting beam.
    4. you can acheive the p.3 result using a helper tools such as LFE, Smerl or  “Dynamic” module generation with compile-time macros
    5. … and I’m sure there is more ;)
Categories: erlang, programming Tags:

How to redirect in webmachine

June 17th, 2009 Daniello 2 comments

Recently I was looking for an example on how to do redirect in webmachine. Unfortunately I haven’t found one. So I started figuring it out by myself. After try and error using brilliant wmtrace_resource It turned out to be trivial ;). Here is the example:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
%% @doc Example of redirect webmachine_resource.
 
-module(redirect_resource).
-export([init/1, resource_exists/2, moved_temporarily/2, previously_existed/2]).
 
-include_lib("webmachine/include/webmachine.hrl").
 
init([]) -> {ok, undefined}.
 
moved_temporarily(ReqData, Context) ->
  Site = wrq:path_info(site, ReqData),
  Location = base64:decode(Site),
  {{true, Location}, ReqData, Context}.
 
previously_existed(ReqData, Context) -> {true, ReqData, Context}.
 
resource_exists(ReqData, Context) -> {false, ReqData, Context}.

The resource is mapped in my dispatch.conf as
{["redirect", site], redirect_resource, []}.
You can do a request like http://host.com/redirect/aHR0cDovL2xhbWJkZXIuY29t
The redirect_resource will interpret the last token in the path as base64 encoded location to redirect to.

Scraping google results in Erlang - the sequel (how to do it securely)

June 13th, 2009 Daniello No comments

In the last post I presented how to use google search service from erlang. But what if we want to do it securely and anonymously? We can use TOR. We can use TOR for that purpose even without installing TOR on our machine by using scroogle.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
-module(scroogle_scrapper).
 
-compile(export_all).
 
-define(SCROOGLE_URL, "https://ssl.scroogle.org/cgi-bin/nbbw.cgi").
-define(SCROOGLE_PEM, "[PATH_TO_SSL_SCROOGLE_ORG_PEM_CERTIFICATE]").
 
start() ->
  inets:start(),
  ssl:start().
 
fetch_scroogle_results(Q) ->
  % We want binary as a result
  Options = [{body_format, binary}],
  HTTPOptions = [{ssl, [{cacertfile, ?SCROOGLE_PEM},{verify, 2}]}],
  ReqBody = "Gw="++url_encoder:encode(Q)++"&n=1",
  Request = {?SCROOGLE_URL, [], "application/x-www-form-urlencoded", ReqBody},
  case http:request(post, Request, HTTPOptions, Options) of
    {ok, {{"HTTP/1.1",200,"OK"}, _, Body}} -> Body;
    {error,Error} -> {error,Error}
  end.
 
 
parse(B, RE, Fun) ->
  case re:run(B, RE, [global, caseless, unicode, dotall, multiline, {capture, all, binary}]) of
    {match, Matches} ->
      lists:map(
        fun(Match) -> Fun(Match) end,
      Matches);
    nomatch -> []
  end.
 
parse_results(B) ->
  RE = "[0-9]+?\.[[:space:]]+(<a href=.+?</a>)",
  parse(B, RE, fun parse_result/1).
 
 
parse_result(GResult) ->
  RE = "<a href="(.*?)".*?>(.*?)</a>",
  Fun = fun([_,Href,Name]) ->
    {Href,Name}
  end,
  parse(GResult, RE, Fun).

You can use it as described in previous post.

Scraping google results in Erlang

June 11th, 2009 Daniello 1 comment

Currently from both legal and technical reasons my full music albums search app (to be published soon) is using external search indices rather building its own.
Among those I plan to use is google search engine. The app needs it to get links to pages containing links to mp3 streams my app is passing to the user.
So effectively I’m going to build google search result scrapper. Here is how it could look like:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
-module(google_scrapper).
 
-compile(export_all).
 
-define(GOOGLE_URL, "http://www.google.co.uk/search?hl=en&btnG=Search&meta=&q=").
 
start() -> inets:start().
 
fetch_google_results(Q) ->
  % In case of redirect lets erlang take care of this for us
  HTTPOptions = [{autoredirect, true}],
  % We want binary as a result
  Options = [{body_format, binary}],
  Headers = [
    % Let's be Firefox ;)
    {"User-Agent", "Mozilla/5.0 (Macintosh; U; Intel Mac OS X 10.5; en-US; rv:1.9.0.10) Gecko/2009042315 Firefox/3.0.10"},
    {"Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,*/*;q=0.8"},
    % I want the result be UTF-8 encoded
    {"Accept-Charset", "utf-8;q=0.7,*;q=0.7"}
  ],
  Request = {?GOOGLE_URL++url_encoder:encode(Q), Headers},
  case http:request(get, Request, HTTPOptions, Options) of
    {ok, {{"HTTP/1.1",200,"OK"}, _, Body}} -> Body;
    {error,Error} -> {error,Error}
  end.
 
 
parse(B, RE, Fun) ->
  case re:run(B, RE, [global, caseless, unicode, dotall, multiline, {capture, all, binary}]) of
    {match, Matches} ->
      lists:map(
        fun(Match) -> Fun(Match) end,
      Matches);
    nomatch -> []
  end.
 
parse_google_results(B) ->
  RE = "<\!--m-->(.*?)<\!--n-->",
  parse(B, RE, fun parse_google_result/1).
 
 
parse_google_result(GResult) ->
  RE = "<li class=g.*?<h3.*?<a href="(.*?)".*?>(.*?)</a>",
  Fun = fun([_,Href,Name]) ->
    {Href,Name}
  end,
  parse(GResult, RE, Fun).

As you could notice I’ve used url_encoder:encode/1 function. The standard OTP doesn’t contain one but you can get it either from gist.github.com/127917 ,
ibrowse or
yaws

You can use it by typing:

google_scrapper:start().
B = google_scrapper:fetch_google_results("google images copyright").
R = google_scrapper:parse_google_results(B).

The result should be similar to:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
[
...
[{<<"http://www.lawdit.co.uk/reading_room/room/view_article.asp?name=../articles/Google%20Sued%20"...>>,
<<"<em>Google</em> Sued for <em>Copyright</em> Infringement through Use of &#39;<em>Google<"...>>},
{<<"http://www.lawdit.co.uk/reading_room/room/view_article.asp?name=../articles/Google%20Sue"...>>,
<<"<em>Google</em> Sued for <em>Copyright</em> Infringement through Use of &#39;<em>Goo"...>>}],
[{<<"http://www.mahalo.com/Google_Images_Copyright_Infringement">>,
<<"<em>Google Images Copyright</em> Infringement - Mahalo">>},
{<<"http://www.mahalo.com/Google_Images_Copyright_Infringement">>,
<<"<em>Google Images Copyright</em> Infringement - Mahalo">>}],
[{<<"http://www.goossip.com/2008/10/google-images-loses-two-copyright-cases.html">>,
<<"<em>Google Images</em> loses two <em>copyright</em> cases in Germany - Goossip <"...>>},
{<<"http://www.goossip.com/2008/10/google-images-loses-two-copyright-cases.html">>,
<<"<em>Google Images</em> loses two <em>copyright</em> cases in Germany - Gooss"...>>}]
…]
Categories: erlang, programming Tags:

providing static content in webmachine

June 2nd, 2009 Daniello 3 comments

Recently I was migrating my web app from plain vanilla mochiweb into webmachine. One caveat I met was how to serve static content. Mochiweb as a default serves static content from its ./priv/www directory but I haven’t found similar convention in webmachine. Indeed, there is no one. Instead one have to create static content resource. I created the webmachine static resource just for that purpose feel free to use it in your projects.

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
%% @author author <daniel.kwiecinski@lambder.com>
%% @copyright Daniel Kwiecinski.
%% @doc Static webmachine resource.
 
-module(static_resource).
-export([init/1, allowed_methods/2,
         content_types_provided/2, resource_exists/2, last_modified/2, provide_content/2]).
 
-include_lib("webmachine/include/webmachine.hrl").
-include_lib("kernel/include/file.hrl").
-record(context, {docroot,fullpath,fileinfo}).
 
init(DocRoot) -> {ok, #context{docroot=DocRoot}}.
 
resource_exists(ReqData, Context) ->
  case get_full_path(Context#context.docroot, wrq:disp_path(ReqData)) of
    undefined -> {false, ReqData, Context};
    Path ->
      case filelib:is_regular(Path) of
        true ->
          case file:read_file_info(Path) of
            {ok, FileInfo} ->
              {true, ReqData, Context#context{fileinfo=FileInfo}};
            {error, _} ->
              {false, ReqData, Context}
          end;
        _ -> {false, ReqData, Context}
      end
  end.
 
content_types_provided(ReqData, Context) ->
    Path = get_full_path(Context#context.docroot, wrq:disp_path(ReqData)),
    {[{webmachine_util:guess_mime(Path), provide_content}], ReqData, Context#context{fullpath=Path}}.
 
allowed_methods(ReqData, Context) -> {['HEAD', 'GET'], ReqData, Context}.
 
last_modified(ReqData, Context) ->
  {(Context#context.fileinfo)#file_info.mtime, ReqData, Context}.
 
provide_content(ReqData, Context) ->
  util:puts(Context),
  {ok, Value} = file:read_file(Context#context.fullpath),
  {Value, ReqData, Context}.
% ------------------ PRIVATE ------------------------
 
 
get_full_path(DocRoot, Path) ->
   case mochiweb_util:safe_relative_path(Path) of
     undefined -> undefined;
     RelPath ->
      FullPath = filename:join([DocRoot, RelPath]),
      case filelib:is_dir(FullPath) of
        true ->
          filename:join([FullPath, "index.html"]);
        false ->
          FullPath
      end
    end.

Bon Appétit