正規表現 -- URLからホスト名、ポート番号、PATHを切り出す
戻る
::::::::::::::
test1.pl
::::::::::::::
#!/usr/bin/perl
# $Id: regex-perl-and-c.html,v 1.1 2009/06/22 16:12:23 kishi Exp kishi $
use strict;
my @URLs = (
"http://asistobe851.hp.infoseek.co.jp/my-memo/perl-regex.html",
"http://asistobe851.hp.infoseek.co.jp:8080/my-memo/",
"http://asistobe851.hp.infoseek.co.jp:80/my-memo/index.html"
);
foreach my $target (@URLs){
print STDERR $target . "\n";
my ( $hostname, $portnum, $path );
if( $target =~ /http:\/\/(.+?)(\/.+)/si ){
$hostname = $1;
$path = $2;
if( $hostname =~ /(.+):(.+)/si ){
$hostname = $1;
$portnum = $2 + 0;
} else {
$portnum = 80;
}
}
printf( "\thostname: %s\n", $hostname);
printf( "\tportnum: %d\n", $portnum);
printf( "\tpath: %s\n", $path);
}
::::::::::::::
Makefile
::::::::::::::
# $Id: regex-perl-and-c.html,v 1.1 2009/06/22 16:12:23 kishi Exp kishi $
CFLAGS = -c -Wall -O2
OBJECT = test1.o
EXE = test1.exe
$(EXE): $(OBJECT)
gcc $(OBJECT) -o $(EXE) && strip $(EXE)
test1.o: test1.c
gcc $(CFLAGS) test1.c
clean:
rm -f *.o *.exe *~ *.bak *.orig
indent:
astyle -c -a -P *.c
::::::::::::::
test1.c
::::::::::::::
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <regex.h>
/**
$Id: regex-perl-and-c.html,v 1.1 2009/06/22 16:12:23 kishi Exp kishi $
だらだらとコピペコードが多くて恐縮です
とりあえずやっつけで作っております。
はっきり言って美しくない⇒今後改善予定!
*/
typedef struct {
int result;
char *hostname;
int portnum;
char *path;
}
Tokens;
Tokens
getTokens ( char *input ) {
Tokens tokens;
int regsuccess;
int nmatch = 3;
regex_t reg;
regmatch_t match[ nmatch ];
tokens.result = -1;
regcomp ( ®, "http://([[:alnum:].~?&%#_=-\\:]+)(\\/?.*)", REG_EXTENDED );
regsuccess = regexec ( ®, input, nmatch, match, 0 );
regfree ( ® );
if ( regsuccess != 0 ) {
return tokens;
}
int size;
size = ( int ) ( match[ 1 ].rm_eo - match[ 1 ].rm_so + 1 );
tokens.hostname = ( char * ) malloc ( size + 1 );
memset ( tokens.hostname, 0, size );
memcpy ( tokens.hostname, input + match[ 1 ].rm_so, size - 1 );
size = ( int ) ( match[ 2 ].rm_eo - match[ 2 ].rm_so + 1 );
tokens.path = ( char * ) malloc ( size );
memset ( tokens.path, 0, size );
memcpy ( tokens.path, input + match[ 2 ].rm_so, size - 1 );
// ポート番号を求める
char *temp_str = ( char * ) malloc ( strlen ( tokens.hostname ) );
strcpy ( temp_str, tokens.hostname );
regcomp ( ®, "(.+):(.+)", REG_EXTENDED );
regsuccess = regexec ( ®, temp_str, nmatch, match, 0 );
regfree ( ® );
if ( regsuccess == 0 ) {
size = ( int ) ( match[ 1 ].rm_eo - match[ 1 ].rm_so + 1 );
memset ( tokens.hostname, 0, size );
memcpy ( tokens.hostname, input + match[ 1 ].rm_so, size - 1 );
size = ( int ) ( match[ 2 ].rm_eo - match[ 2 ].rm_so + 1 );
char *port_str = ( char * ) malloc ( size );
memset ( port_str, 0, size );
memcpy ( port_str, temp_str + match[ 2 ].rm_so, size - 1 );
tokens.portnum = atoi ( port_str );
} else {
tokens.portnum = 80;
}
tokens.result = 0; // SUCCESS
return tokens;
}
int
main ( int argc, char **argv ) {
char * URLs[] = {
"http://asistobe851.hp.infoseek.co.jp/my-memo/perl-regex.html",
"http://asistobe851.hp.infoseek.co.jp:8080/my-memo/",
"http://asistobe851.hp.infoseek.co.jp:80/my-memo/index.html",
};
//-----------------------------------
// 設定された要素数を求める
//-----------------------------------
int count = sizeof ( URLs ) / sizeof ( *URLs );
int i;
for ( i = 0; i < count; i++ ) {
fprintf ( stderr, "%s\n", URLs[ i ] );
Tokens tokens = getTokens ( URLs[ i ] );
if ( tokens.result == 0 ) {
printf ( "-- %s\n", tokens.hostname );
printf ( "-- %d\n", tokens.portnum );
printf ( "-- %s\n", tokens.path );
}
}
return 0;
}
■実行結果
$ ./test1.pl
http://asistobe851.hp.infoseek.co.jp/my-memo/perl-regex.html
hostname: asistobe851.hp.infoseek.co.jp
portnum: 80
path: /my-memo/perl-regex.html
http://asistobe851.hp.infoseek.co.jp:8080/my-memo/
hostname: asistobe851.hp.infoseek.co.jp
portnum: 8080
path: /my-memo/
http://asistobe851.hp.infoseek.co.jp:80/my-memo/index.html
hostname: asistobe851.hp.infoseek.co.jp
portnum: 80
path: /my-memo/index.html
$ ./test1
http://asistobe851.hp.infoseek.co.jp/my-memo/perl-regex.html
-- asistobe851.hp.infoseek.co.jp
-- 80
-- /my-memo/perl-regex.html
http://asistobe851.hp.infoseek.co.jp:8080/my-memo/
-- http://asistobe851.hp.infosee
-- 8080
-- /my-memo/
http://asistobe851.hp.infoseek.co.jp:80/my-memo/index.html
-- http://asistobe851.hp.infosee
-- 80
-- /my-memo/index.html
戻る