Archive for September, 2008

Transcoding HTTP mp3 streaming proxy in bash

Here’s how to make a proxy for streaming mp3s. It transcodes on-the-fly to 64kpbs MP3 using lame. When transcoding is finished, it calls the ./posthandler.sh script, which can either just delete the file, or potentially archive it so you don’t need to transcode it again.

  1. #!/bin/bash
  2. read method url version
  3.  
  4. method="${method%$CR}"
  5. url="${url%$CR}"
  6. version="${version%$CR}"
  7.  
  8. echo -ne "HTTP/1.0 200 OK\r\nContent-type: audio/mpeg\r\n\r\n"
  9.  
  10. BR=64 #birate to transcode to.
  11. PIPE="/tmp/$$.pipe"
  12. mkfifo "$PIPE"
  13.  
  14. OUTFILE="./tmp.$$.$BR.mp3"
  15. rm $OUTFILE
  16. url=`echo "$url" | sed ‘s/\///’`
  17. echo "** GET $url" >&2
  18.  
  19. nohup lynxsource "$url" \
  20.     | (lame –preset cbr $BR –mp3input – - 2>/dev/null \
  21.       && (echo "** Finished transcoding $url" >&2 ; \
  22.           ./posthandler.sh "$OUTFILE"&))\
  23.     | tee -i "$PIPE" > $OUTFILE &
  24.  
  25. cat < $PIPE
  26. rm $PIPE


One interesting limitation seems to be the buffer size of a fifo pipe in linux. Even though the transcoding step is pretty quick, if a client is connected the transcoding only manages to fill the pipe a couple of hundred k ahead of what is being read.

The -i flag to `tee` means it ignores interrupts, and will finish transcoding the file and call the posthandler even if the client disconnects.

Run is like this:

while [ 1 ]; do nc -vlp 8080 -c './transstreamer.sh' ; done

Then hit up a url of your choice using your awesome new proxy:

mpg321 "http://localhost:8080/http://freedownloads.last.fm/download/105468518/Letters%2BFrom%2BThe%2BBoatman.mp3"

Not the most scalable solution, but a mildly amusing quick hack.

Tags: , , ,

Monday, September 29th, 2008 hacks, programming No Comments

Erlang libketama driver – Consistent Hashing

All the data I need from memcached is assigned to servers using a consistent hashing mechanism, implemented as libketama – a shared library written in C. We use a php extension to wrap this, and also have a pure java implementation. Rather than port the algorithm to Erlang, I wrote a an Erlang driver.

There are 3 things covered here:

  • A small driver program written in C (using libketama)
  • Some basic testing from the shell using Perl and xxd
  • The Erlang gen_server that calls it

C driver program

  1. /*  Expects a one-byte length header, followed by a key (<255bytes)
  2.  *  Returns an ip:port string with 1 byte len header
  3.  *
  4.  */
  5. #include <ketama.h>
  6. #include <stdio.h>
  7. #include <stdlib.h>
  8. #include <unistd.h>
  9. #include <string.h>
  10.  
  11. typedef unsigned char byte;
  12.  
  13. int read_exact(byte *buf, int len)
  14. {
  15.     int i, got = 0;
  16.     do {
  17.         if((i=read(0,buf+got, len-got))<=0) return i;
  18.         got += i;
  19.     } while(got<len);
  20.     return len;
  21. }
  22.  
  23. int main(int argc, char **argv)
  24. {
  25.     if(argc==1){
  26.         printf("Usage: %s <ketama.servers file>\n", *argv);
  27.         return 1;
  28.     }
  29.  
  30.     ketama_continuum c;
  31.     ketama_roll( &c, *++argv );
  32.     mcs *m;
  33.  
  34.     byte len;
  35.     byte buffer[256];
  36.     while ( 1 ) {
  37.         if( 1 != read_exact(&len, 1) ) break;
  38.         if( (int)len >= 255 ) break;
  39.         read_exact((byte *)&buffer, (int)len);
  40.         buffer[len] = \0;
  41.         m = ketama_get_server( (char *) &buffer, c );
  42.         sprintf((char *)&buffer, "%s",m->ip);
  43.         int respleni = strlen(m->ip);
  44.         char l = (0xff & respleni);
  45.         write(1, &l, 1);
  46.         write(1, (char*)&buffer, respleni);
  47.     }
  48.  
  49.     return 0;
  50. }


Testing the driver with Perl and xxd

Before writing the Erlang bit, it’d be nice to know the driver program does what we expect.  Will send the driver a 1-byte length header followed by the key, and expect a 1-byte length header and the value as a response. Say we’re hashing a memcached key ‘user:123′ to a server, we can do what the Erlang port does with a bit of perl, and the ‘xxd’ command to see output in binary.

perl -e '$key="user:123"; $len=pack("C",length($key)); print $len; print $key;' | xxd -b

0000000: 00001000 01110101 01110011 01100101 01110010 00111010  .user:
0000006: 00110001 00110010 00110011                             123

Note the first byte (00001000) printed before the key is the length of the key, 8. Now let’s send this to the driver program and check the response (provide a valid ketama.servers file):

perl -e '$key="user:123"; $len=pack("C",length($key)); print $len; print $key;' | ./ketama_erlang_driver /var/ketama.servers | xxd -b

0000000: 00010000 00110001 00110000 00101110 00110000 00101110  .10.0.
0000006: 00110001 00101110 00110001 00110001 00111000 00111010  1.118:
000000c: 00110001 00110001 00110010 00110001 00110001           11211

The first byte of the response (00010000) is 16, which is the length of the server address returned by the driver, “10.0.1.118:11211″ – It does what we expect, onwards…

The Erlang bit

  1. -module(ketama).
  2. -behaviour(gen_server).
  3. -export([start_link/0, start_link/1, start_link/2, getserver/1]).
  4. -export([init/1, handle_call/3, handle_cast/2, handle_info/2,
  5.      terminate/2, code_change/3]).
  6.  
  7. -record(state, {port}).
  8.  
  9. start_link() ->
  10.     start_link("/web/site/GLOBAL/ketama.servers").
  11.  
  12. start_link(ServersFile) ->
  13.     start_link(ServersFile, "/usr/bin/ketama_erlang_driver").
  14.  
  15. start_link(ServersFile, BinPath) ->
  16.     gen_server:start_link({local, ?MODULE}, ?MODULE, [ServersFile, BinPath], []).
  17.  
  18. getserver(Key) ->
  19.     gen_server:call(?MODULE, {getserver, Key}).
  20.  
  21. %%
  22.  
  23. init([ServersFile, BinPath]) ->
  24.     Exe = BinPath ++ " " ++ ServersFile,
  25.     Port = open_port({spawn, Exe}, [binary, {packet, 1}, use_stdio]),
  26.     {ok, #state{port=Port}}.
  27.  
  28. handle_call({getserver, Key}, _From, #state{port=Port} = State) ->
  29.     Port ! {self(), {command, Key}},
  30.     receive
  31.         {Port, {data, Data}} ->
  32.             {reply, Data, State}
  33.         after 1000 -> % if it takes this long, you have serious issues.
  34.             {stop, ketama_port_timeout, State}
  35.     end.
  36.  
  37. handle_cast(_Msg, State) ->    {noreply, State}.
  38. handle_info({‘EXIT’, Port, Reason}, #state{port = Port} = State) ->
  39.     {stop, {port_terminated, Reason}, State}.
  40. terminate({port_terminated, _Reason}, _State) ->    ok;
  41. terminate(_Reason, #state{port = Port} = _State) ->     port_close(Port).
  42. code_change(_OldVsn, State, _Extra) ->     {ok, State}.



This code can be found in the erlang directory of the ketama source in svn:
svn://svn.audioscrobbler.net/misc/ketama/

Tags: , , , , ,

Sunday, September 28th, 2008 programming 9 Comments

Reading Serialized PHP Objects from Erlang

I started writing some Erlang recently. The vast majority of data I need to access from Erlang resides in cached, serialized php objects. Here’s what I came up with to turn a serialized php object into a sort of nested Erlang proplist thing.

  1. <?php
  2. $s = array(123, ‘hello’, 3.14, array(‘a’=>‘foo’, ‘b’=>‘bar’));
  3. ?>

This gives:

a:4:{i:0;i:123;i:1;s:5:"hello";i:2;d:3.14;i:3;a:2:{s:1:"a";s:3:"foo";s:1:"b";s:3:"bar";}}

It’s not hard to see how the (relatively undocumented) PHP serialization format works. Here’s what it becomes in Erlang:


1> php:unserialize("a:4:{i:0;i:123;i:1;s:5:\"hello\";i:2;d:3.14;i:3;a:2:{s:1:\"a\";s:3:\"foo\";s:1:\"b\";s:3:\"bar\";}}").

{[[{0,123},
{1,<<"hello">>},
{2,3.14},
{3,[{a,<<"foo">>},{b,<<"bar">>}]}]],
[]}

Here’s what it does with objects:

  1. <?php
  2. class ExampleClass {
  3.     var $id = 123;
  4.     var $name = "RJ";
  5.     var $languages = array(‘php’, ‘erlang’, ‘etc’);
  6. }
  7. $s = new ExampleClass();
  8. $ser = serialize($s);
  9. ?>


2> php:unserialize("O:12:\"ExampleClass\":3:{s:2:\"id\";i:123;s:4:\"name\";s:2:\"RJ\";s:9:\"languages\";a:3:{i:0;s:3:\"php\";i:1;s:6:\"erlang\";i:2;s:3:\"etc\";}}").
{[{class,"ExampleClass",
[{id,123},
{name,<<"RJ">>},
{languages,[{0,<<"php">>},
{1,<<"erlang">>},
{2,<<"etc">>}]}]}],
[]}

Due to a combination of PHP’s “relaxed” type system, an old database abstraction library, and munging things in and out of memcached, we sometimes end up with numeric properties, such as ‘id’, represented as strings by PHP. To mitigate this, I ended up with some nasty code that forces certain properties to a predefined type (“id” is always an int, etc..). Yuk. Anyway, here’s the Erlang module:

  1. %
  2. % Takes a serialized php object and turns it into an erlang data structure
  3. %
  4. -module(php).
  5. -author(‘Richard Jones <rj at last.fm>’).
  6. -export([unserialize/1]).
  7.  
  8. % Usage:  {Result, Leftover} = php:unserialize(…)
  9.  
  10. unserialize(S) when is_binary(S)    -> unserialize(binary_to_list(S));
  11. unserialize(S) when is_list(S)      -> takeval(S, 1).
  12.  
  13. % Internal stuff
  14.  
  15. takeval(Str, Num) ->
  16.     {Parsed, Remains} = takeval(Str, Num, []),
  17.     { lists:reverse(Parsed), Remains }.
  18.  
  19. takeval([$} | Leftover], 0, Acc)    -> {Acc, Leftover};
  20. takeval(Str, 0, Acc)                -> {Acc, Str};
  21. takeval([], 0, Acc)                 -> Acc;
  22.  
  23. takeval(Str, Num, Acc) ->
  24.     {Val, Rest} = phpval(Str),
  25.     %Lots of tracing if you enable this:
  26.     %io:format("\nState\n Str: ~s\n Num: ~w\n Acc:~w\n", [Str,Num,Acc]),
  27.     %io:format("-Val: ~w\n-Rest: ~s\n\n",[Val, Rest]),
  28.     takeval(Rest, Num-1, [Val | Acc]).
  29.  
  30. %
  31. % Parse induvidual php values.
  32. % a "phpval" here is T:val; where T is the type code for int, object, array etc..
  33. %
  34.  
  35. % Simple ones:
  36. phpval([])                      -> [];
  37. phpval([ $} | Rest ])           -> phpval(Rest);    % skip }
  38. phpval([$N,$;|Rest])            -> {null, Rest};    % null
  39. phpval([$b,$:,$1,$; | Rest])    -> {true, Rest};    % true
  40. phpval([$b,$:,$0,$; | Rest])    -> {false, Rest};   % false
  41.  
  42. % r seems to be a recursive reference to something, represented as an int.
  43. phpval([$r, $: | Rest]) ->
  44.     {RefNum, [$; | Rest1]} = string:to_integer(Rest),
  45.     {{php_ref, RefNum}, Rest1};
  46.  
  47. % int
  48. phpval([$i, $: | Rest])->
  49.     {Num, [$; | Rest1]} = string:to_integer(Rest),
  50.     {Num, Rest1};
  51.  
  52. % double / float
  53. % NB: php floats can be ints, and string:to_float doesn’t like that.
  54. phpval(X=[$d, $: | Rest]) ->
  55.     {Num, [$; | Rest1]} = case string:to_float(Rest) of
  56.                             {error, no_float} -> string:to_integer(Rest);
  57.                             {N,R} -> {N,R}
  58.     end,
  59.     {Num, Rest1};
  60.  
  61. % string
  62. phpval([$s, $: | Rest]) ->
  63.     {Len, [$: | Rest1]} =string:to_integer(Rest),
  64.     S = list_to_binary(string:sub_string(Rest1, 2, Len+1)),
  65.     {S, lists:nthtail(Len+3, Rest1)};
  66.  
  67. % array
  68. phpval([$a, $: | Rest]) ->
  69.     {NumEntries, [$:, ${ | Rest1]} =string:to_integer(Rest),
  70.     {Array, Rest2} = takeval(Rest1, NumEntries*2),
  71.     {arraytidy(Array), Rest2};
  72.  
  73. % object O:4:\"User\":53:{
  74. phpval([$O, $: | Rest]) ->
  75.     {ClassnameLen, [$: | Rest1]} =string:to_integer(Rest),
  76.     % Rest1: "classname":NumEnt:{..
  77.     Classname = string:sub_string(Rest1, 2, ClassnameLen+1),
  78.     Rest1b = lists:nthtail(ClassnameLen+3, Rest1),
  79.     {NumEntries, [$:, ${ | Rest2]} = string:to_integer(Rest1b),
  80.     {Classvals, Rest3} = takeval(Rest2, NumEntries*2),
  81.     {{class, Classname, arraytidy(Classvals)}, Rest3}.
  82.  
  83. %%
  84. %% Helpers:
  85. %%
  86.  
  87. % convert [ k1,v1,k2,v2,k3,v3 ] into [ {k1,v2}, {k2,v2}, {k3,v3} ]
  88. arraytidy(L) ->
  89.     lists:reverse(lists:foldl(fun arraytidy/2, [], L)).
  90.  
  91. arraytidy(El, [{key___partial, K} | L]) -> [{atomize(K), El} | L];
  92.  
  93. arraytidy(El, L) -> [{key___partial, El} | L].
  94.  
  95. %% Make properties or keys into atoms
  96. atomize(K) when is_binary(K) ->
  97.     atomize(binary_to_list(K));
  98. atomize(K) when is_list(K) ->
  99.     list_to_atom(string:to_lower(K));
  100. atomize(K) -> K.

Tags: ,

Saturday, September 27th, 2008 programming 9 Comments