summaryrefslogtreecommitdiff
path: root/src/StaticFarm/CacheControl.pm
blob: 1525cd29091fd4127d45e164deb8aa49e30ef078 (plain)
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
# (C) 2013 Paul Buetow

package StaticFarm::CacheControl;

use strict;
use warnings;

use Apache2::Const -compile => qw(HTTP_OK HTTP_NO_CONTENT HTTP_NOT_FOUND);
use Apache2::Log;
use Apache2::RequestIO;
use Apache2::RequestRec;
use Apache2::Response;
use Apache2::ServerUtil;
use APR::Table;

use File::Basename;
use File::Copy qw(move);
use File::MimeInfo;
use File::Path qw(make_path);
use LWP::Simple qw($ua getstore);

my $FETCH_FALLBACK_ENABLE = $ENV{CACHECONTROL_FETCH_FALLBACK_ENABLE};
my $FETCH_FALLBACK_HOSTSDIR = $ENV{CACHECONTROL_FETCH_FALLBACK_HOSTSDIR};
my $FETCH_MW_HA_HOST = $ENV{CACHECONTROL_FETCH_MW_HA_HOST};
my $FETCH_PROTO = $ENV{CACHECONTROL_FETCH_PROTO};
my $FETCH_REASK_AFTER = $ENV{CACHECONTROL_FETCH_REASK_AFTER};
my $FETCH_TIMEOUT = $ENV{CACHECONTROL_FETCH_TIMEOUT};
my $FETCH_MAX_LIMIT = $ENV{CACHECONTROL_FETCH_MAX_LIMIT};
my $FETCH_MAX_INTERVAL = $ENV{CACHECONTROL_FETCH_MAX_INTERVAL};
my $VERBOSE = $ENV{CACHECONTROL_WARN_VERBOSE};

# ... now setup some serious stuff!!
my $SERVER_ROOT = Apache2::ServerUtil::server_root();
my $DOCUMENT_ROOT = "$SERVER_ROOT/htdocs";
my $RUN_DIR = "$SERVER_ROOT/run";
my $STATIC_ROOT = "$DOCUMENT_ROOT/static";
my $DOT_RE = qr/\.\./;
my $QRY_RE = qr/\?.*/;
my $IGNORE_RE = qr/favicon.ico/;

# TMP_DIR is in DOCUMENT_ROOT due FS performance issue (must be on same partition)
my $TMP_DIR = "$RUN_DIR/cachetmp";

my %NOT_FOUND;
my $FETCH_MAX_COUNTER = 0;
my $FETCH_MAX_TIME = 0;

sub my_warn {
  my $msg = shift;

  Apache2::ServerRec::warn("CacheControl: $msg");
}

sub my_response {
  my ($r, $what, $msg) = @_;

  $r->custom_response($what, "<body><html>$msg</html></body>");

  return $what;
}

sub my_getstore {
  my ($url, $tmp_file) = @_;

  my_warn("Fetching $url -> $tmp_file with timeout $FETCH_TIMEOUT") if $VERBOSE == 1;

  $ua->timeout($FETCH_TIMEOUT);
  my $http_code = getstore($url, $tmp_file); 

  if ($http_code >= 301) {
    unlink $tmp_file if -f $tmp_file;

    my_warn("Document $url not fetchable (HTTP status is $http_code)");
  }

  return $http_code;
}

sub handler {
  my $r = shift;

  return fetch_file($r);
}

sub get_fallback_mw_hosts {
  opendir my $dh, $FETCH_FALLBACK_HOSTSDIR or return ();

  my @fallbacks;
  while (my $d = readdir($dh)) { 
    next if $d =~ /^\./;
    push @fallbacks, $d;
  }
  close $dh;

  return @fallbacks;
}
sub fetch_file {
  my $r = shift;

  unless (-e $STATIC_ROOT) {
    my_warn("Static root $STATIC_ROOT does not exist.");
    return my_response($r, Apache2::Const::HTTP_NOT_FOUND, "File not found!");
  }

  my $request_uri = $ENV{REQUEST_URI}; 
  $request_uri =~ s/$DOT_RE//g;
  $request_uri =~ s/$QRY_RE//;

  my $mw_url = "$FETCH_PROTO://$FETCH_MW_HA_HOST/static/$ENV{SERVER_NAME}";
  my $file = "$STATIC_ROOT/$ENV{SERVER_NAME}$request_uri";
  my $basename = basename($file);
  my $tmp_file = "$TMP_DIR/$basename";

  if ($request_uri =~ $IGNORE_RE) {
    my_warn("Ignoring $file, don't try to fetch from MW");
    return my_response($r, Apache2::Const::HTTP_NOT_FOUND, "File not found!");
  }

  $r->uri($request_uri);

  unless (-e $TMP_DIR) {
    my_warn("Creating directory $TMP_DIR") if $VERBOSE == 1;
    make_path($TMP_DIR);
  }

  my $now = time();
  # Prevent DOS attacks against the middleware server
  if (++$FETCH_MAX_COUNTER > $FETCH_MAX_LIMIT) {
    if ($now - $FETCH_MAX_TIME > $FETCH_MAX_INTERVAL) {
      $FETCH_MAX_COUNTER = 1;
      $FETCH_MAX_TIME= $now;
    } else {
      my_warn("Don't try to fetch $request_uri from mw, because in FETCH_MAX_INTERVAL=$FETCH_MAX_INTERVAL seconds we had already $FETCH_MAX_COUNTER tries but FETCH_MAX_LIMIT=$FETCH_MAX_LIMIT seconds");
      return my_response($r, Apache2::Const::HTTP_NOT_FOUND, "File not found!");
      #return Apache2::Const::HTTP_NOT_FOUND;
    }
  }

  if ($FETCH_REASK_AFTER != 0 && exists $NOT_FOUND{$request_uri}) {
    my $last_access = $now - $NOT_FOUND{$request_uri};
    if ($last_access < $FETCH_REASK_AFTER) {
      my_warn("Don't try to fetch $request_uri from mw, because you can ask for this file only 1 time within FETCH_REASK_AFTER=$FETCH_REASK_AFTER seconds");
      return my_response($r, Apache2::Const::HTTP_NOT_FOUND, "File not found!");
      #return Apache2::Const::HTTP_NOT_FOUND;
    } else {
      delete $NOT_FOUND{$request_uri};
    }
  }

  my $url = "$FETCH_PROTO://$FETCH_MW_HA_HOST/static/$ENV{SERVER_NAME}/$request_uri";
  my $http_code = my_getstore($url, $tmp_file);

  if ($http_code >= 500 && $FETCH_FALLBACK_ENABLE == 1) {
    # The staticmw ha address (FETCH_MW_HA_HOST) is not reachable or broken, try fallback MW hosts
    for (get_fallback_mw_hosts()) {
      $url = "$FETCH_PROTO://$_/static/$ENV{SERVER_NAME}/$request_uri";
      $http_code = my_getstore($url, $tmp_file);
      last if $http_code < 400;
    }
  } 

  if ($http_code >= 301) {
    $NOT_FOUND{$request_uri} = time() if $FETCH_REASK_AFTER != 0;
    return my_response($r, Apache2::Const::HTTP_NOT_FOUND, "File not found!");
    #return Apache2::Const::HTTP_NOT_FOUND;

  } else {
    my $dirname = dirname($file);

    unless (-d $dirname) {
      my_warn("Creating directory $dirname") if $VERBOSE == 1;
      make_path($dirname);
    }

    my_warn("Moving $tmp_file -> $file") if $VERBOSE == 1;

    unless (move($tmp_file, $file)) {
      my_warn("Could not move file $tmp_file -> $file: $!");
      return Apache2::Const::HTTP_NO_CONTENT;
    }

    open my $fh, $file or do {
      my_warn("Could not open file $file: $!");
      return Apache2::Const::HTTP_NO_CONTENT;
    };

    $r->content_type(mimetype($file));
    print while <$fh>;
    close $fh;

    return Apache2::Const::OK;
  }
}

1;