Ninja
deps_log.cc
Go to the documentation of this file.
1 // Copyright 2012 Google Inc. All Rights Reserved.
2 //
3 // Licensed under the Apache License, Version 2.0 (the "License");
4 // you may not use this file except in compliance with the License.
5 // You may obtain a copy of the License at
6 //
7 // http://www.apache.org/licenses/LICENSE-2.0
8 //
9 // Unless required by applicable law or agreed to in writing, software
10 // distributed under the License is distributed on an "AS IS" BASIS,
11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12 // See the License for the specific language governing permissions and
13 // limitations under the License.
14 
15 #include "deps_log.h"
16 
17 #include <assert.h>
18 #include <stdio.h>
19 #include <errno.h>
20 #include <string.h>
21 #ifndef _WIN32
22 #include <unistd.h>
23 #endif
24 
25 #include "graph.h"
26 #include "metrics.h"
27 #include "state.h"
28 #include "util.h"
29 
30 // The version is stored as 4 bytes after the signature and also serves as a
31 // byte order mark. Signature and version combined are 16 bytes long.
32 const char kFileSignature[] = "# ninjadeps\n";
33 const int kCurrentVersion = 3;
34 
35 // Record size is currently limited to less than the full 32 bit, due to
36 // internal buffers having to have this size.
37 const unsigned kMaxRecordSize = (1 << 19) - 1;
38 
40  Close();
41 }
42 
43 bool DepsLog::OpenForWrite(const string& path, string* err) {
44  if (needs_recompaction_) {
45  if (!Recompact(path, err))
46  return false;
47  }
48 
49  file_ = fopen(path.c_str(), "ab");
50  if (!file_) {
51  *err = strerror(errno);
52  return false;
53  }
54  // Set the buffer size to this and flush the file buffer after every record
55  // to make sure records aren't written partially.
56  setvbuf(file_, NULL, _IOFBF, kMaxRecordSize + 1);
57  SetCloseOnExec(fileno(file_));
58 
59  // Opening a file in append mode doesn't set the file pointer to the file's
60  // end on Windows. Do that explicitly.
61  fseek(file_, 0, SEEK_END);
62 
63  if (ftell(file_) == 0) {
64  if (fwrite(kFileSignature, sizeof(kFileSignature) - 1, 1, file_) < 1) {
65  *err = strerror(errno);
66  return false;
67  }
68  if (fwrite(&kCurrentVersion, 4, 1, file_) < 1) {
69  *err = strerror(errno);
70  return false;
71  }
72  }
73  if (fflush(file_) != 0) {
74  *err = strerror(errno);
75  return false;
76  }
77  return true;
78 }
79 
81  const vector<Node*>& nodes) {
82  return RecordDeps(node, mtime, nodes.size(),
83  nodes.empty() ? NULL : (Node**)&nodes.front());
84 }
85 
87  int node_count, Node** nodes) {
88  // Track whether there's any new data to be recorded.
89  bool made_change = false;
90 
91  // Assign ids to all nodes that are missing one.
92  if (node->id() < 0) {
93  if (!RecordId(node))
94  return false;
95  made_change = true;
96  }
97  for (int i = 0; i < node_count; ++i) {
98  if (nodes[i]->id() < 0) {
99  if (!RecordId(nodes[i]))
100  return false;
101  made_change = true;
102  }
103  }
104 
105  // See if the new data is different than the existing data, if any.
106  if (!made_change) {
107  Deps* deps = GetDeps(node);
108  if (!deps ||
109  deps->mtime != mtime ||
110  deps->node_count != node_count) {
111  made_change = true;
112  } else {
113  for (int i = 0; i < node_count; ++i) {
114  if (deps->nodes[i] != nodes[i]) {
115  made_change = true;
116  break;
117  }
118  }
119  }
120  }
121 
122  // Don't write anything if there's no new info.
123  if (!made_change)
124  return true;
125 
126  // Update on-disk representation.
127  unsigned size = 4 * (1 + 1 + node_count);
128  if (size > kMaxRecordSize) {
129  errno = ERANGE;
130  return false;
131  }
132  size |= 0x80000000; // Deps record: set high bit.
133  if (fwrite(&size, 4, 1, file_) < 1)
134  return false;
135  int id = node->id();
136  if (fwrite(&id, 4, 1, file_) < 1)
137  return false;
138  int timestamp = mtime;
139  if (fwrite(&timestamp, 4, 1, file_) < 1)
140  return false;
141  for (int i = 0; i < node_count; ++i) {
142  id = nodes[i]->id();
143  if (fwrite(&id, 4, 1, file_) < 1)
144  return false;
145  }
146  if (fflush(file_) != 0)
147  return false;
148 
149  // Update in-memory representation.
150  Deps* deps = new Deps(mtime, node_count);
151  for (int i = 0; i < node_count; ++i)
152  deps->nodes[i] = nodes[i];
153  UpdateDeps(node->id(), deps);
154 
155  return true;
156 }
157 
159  if (file_)
160  fclose(file_);
161  file_ = NULL;
162 }
163 
164 bool DepsLog::Load(const string& path, State* state, string* err) {
165  METRIC_RECORD(".ninja_deps load");
166  char buf[kMaxRecordSize + 1];
167  FILE* f = fopen(path.c_str(), "rb");
168  if (!f) {
169  if (errno == ENOENT)
170  return true;
171  *err = strerror(errno);
172  return false;
173  }
174 
175  bool valid_header = true;
176  int version = 0;
177  if (!fgets(buf, sizeof(buf), f) || fread(&version, 4, 1, f) < 1)
178  valid_header = false;
179  // Note: For version differences, this should migrate to the new format.
180  // But the v1 format could sometimes (rarely) end up with invalid data, so
181  // don't migrate v1 to v3 to force a rebuild. (v2 only existed for a few days,
182  // and there was no release with it, so pretend that it never happened.)
183  if (!valid_header || strcmp(buf, kFileSignature) != 0 ||
184  version != kCurrentVersion) {
185  if (version == 1)
186  *err = "deps log version change; rebuilding";
187  else
188  *err = "bad deps log signature or version; starting over";
189  fclose(f);
190  unlink(path.c_str());
191  // Don't report this as a failure. An empty deps log will cause
192  // us to rebuild the outputs anyway.
193  return true;
194  }
195 
196  long offset;
197  bool read_failed = false;
198  int unique_dep_record_count = 0;
199  int total_dep_record_count = 0;
200  for (;;) {
201  offset = ftell(f);
202 
203  unsigned size;
204  if (fread(&size, 4, 1, f) < 1) {
205  if (!feof(f))
206  read_failed = true;
207  break;
208  }
209  bool is_deps = (size >> 31) != 0;
210  size = size & 0x7FFFFFFF;
211 
212  if (fread(buf, size, 1, f) < 1 || size > kMaxRecordSize) {
213  read_failed = true;
214  break;
215  }
216 
217  if (is_deps) {
218  assert(size % 4 == 0);
219  int* deps_data = reinterpret_cast<int*>(buf);
220  int out_id = deps_data[0];
221  int mtime = deps_data[1];
222  deps_data += 2;
223  int deps_count = (size / 4) - 2;
224 
225  Deps* deps = new Deps(mtime, deps_count);
226  for (int i = 0; i < deps_count; ++i) {
227  assert(deps_data[i] < (int)nodes_.size());
228  assert(nodes_[deps_data[i]]);
229  deps->nodes[i] = nodes_[deps_data[i]];
230  }
231 
232  total_dep_record_count++;
233  if (!UpdateDeps(out_id, deps))
234  ++unique_dep_record_count;
235  } else {
236  int path_size = size - 4;
237  assert(path_size > 0); // CanonicalizePath() rejects empty paths.
238  // There can be up to 3 bytes of padding.
239  if (buf[path_size - 1] == '\0') --path_size;
240  if (buf[path_size - 1] == '\0') --path_size;
241  if (buf[path_size - 1] == '\0') --path_size;
242  StringPiece path(buf, path_size);
243  Node* node = state->GetNode(path);
244 
245  // Check that the expected index matches the actual index. This can only
246  // happen if two ninja processes write to the same deps log concurrently.
247  // (This uses unary complement to make the checksum look less like a
248  // dependency record entry.)
249  unsigned checksum = *reinterpret_cast<unsigned*>(buf + size - 4);
250  int expected_id = ~checksum;
251  int id = nodes_.size();
252  if (id != expected_id) {
253  read_failed = true;
254  break;
255  }
256 
257  assert(node->id() < 0);
258  node->set_id(id);
259  nodes_.push_back(node);
260  }
261  }
262 
263  if (read_failed) {
264  // An error occurred while loading; try to recover by truncating the
265  // file to the last fully-read record.
266  if (ferror(f)) {
267  *err = strerror(ferror(f));
268  } else {
269  *err = "premature end of file";
270  }
271  fclose(f);
272 
273  if (!Truncate(path.c_str(), offset, err))
274  return false;
275 
276  // The truncate succeeded; we'll just report the load error as a
277  // warning because the build can proceed.
278  *err += "; recovering";
279  return true;
280  }
281 
282  fclose(f);
283 
284  // Rebuild the log if there are too many dead records.
285  int kMinCompactionEntryCount = 1000;
286  int kCompactionRatio = 3;
287  if (total_dep_record_count > kMinCompactionEntryCount &&
288  total_dep_record_count > unique_dep_record_count * kCompactionRatio) {
289  needs_recompaction_ = true;
290  }
291 
292  return true;
293 }
294 
296  // Abort if the node has no id (never referenced in the deps) or if
297  // there's no deps recorded for the node.
298  if (node->id() < 0 || node->id() >= (int)deps_.size())
299  return NULL;
300  return deps_[node->id()];
301 }
302 
303 bool DepsLog::Recompact(const string& path, string* err) {
304  METRIC_RECORD(".ninja_deps recompact");
305  printf("Recompacting deps...\n");
306 
307  Close();
308  string temp_path = path + ".recompact";
309 
310  // OpenForWrite() opens for append. Make sure it's not appending to a
311  // left-over file from a previous recompaction attempt that crashed somehow.
312  unlink(temp_path.c_str());
313 
314  DepsLog new_log;
315  if (!new_log.OpenForWrite(temp_path, err))
316  return false;
317 
318  // Clear all known ids so that new ones can be reassigned. The new indices
319  // will refer to the ordering in new_log, not in the current log.
320  for (vector<Node*>::iterator i = nodes_.begin(); i != nodes_.end(); ++i)
321  (*i)->set_id(-1);
322 
323  // Write out all deps again.
324  for (int old_id = 0; old_id < (int)deps_.size(); ++old_id) {
325  Deps* deps = deps_[old_id];
326  if (!deps) continue; // If nodes_[old_id] is a leaf, it has no deps.
327 
328  if (!IsDepsEntryLiveFor(nodes_[old_id]))
329  continue;
330 
331  if (!new_log.RecordDeps(nodes_[old_id], deps->mtime,
332  deps->node_count, deps->nodes)) {
333  new_log.Close();
334  return false;
335  }
336  }
337 
338  new_log.Close();
339 
340  // All nodes now have ids that refer to new_log, so steal its data.
341  deps_.swap(new_log.deps_);
342  nodes_.swap(new_log.nodes_);
343 
344  if (unlink(path.c_str()) < 0) {
345  *err = strerror(errno);
346  return false;
347  }
348 
349  if (rename(temp_path.c_str(), path.c_str()) < 0) {
350  *err = strerror(errno);
351  return false;
352  }
353 
354  return true;
355 }
356 
358  // Skip entries that don't have in-edges or whose edges don't have a
359  // "deps" attribute. They were in the deps log from previous builds, but
360  // the the files they were for were removed from the build and their deps
361  // entries are no longer needed.
362  // (Without the check for "deps", a chain of two or more nodes that each
363  // had deps wouldn't be collected in a single recompaction.)
364  return node->in_edge() && !node->in_edge()->GetBinding("deps").empty();
365 }
366 
367 bool DepsLog::UpdateDeps(int out_id, Deps* deps) {
368  if (out_id >= (int)deps_.size())
369  deps_.resize(out_id + 1);
370 
371  bool delete_old = deps_[out_id] != NULL;
372  if (delete_old)
373  delete deps_[out_id];
374  deps_[out_id] = deps;
375  return delete_old;
376 }
377 
378 bool DepsLog::RecordId(Node* node) {
379  int path_size = node->path().size();
380  int padding = (4 - path_size % 4) % 4; // Pad path to 4 byte boundary.
381 
382  unsigned size = path_size + padding + 4;
383  if (size > kMaxRecordSize) {
384  errno = ERANGE;
385  return false;
386  }
387  if (fwrite(&size, 4, 1, file_) < 1)
388  return false;
389  if (fwrite(node->path().data(), path_size, 1, file_) < 1) {
390  assert(node->path().size() > 0);
391  return false;
392  }
393  if (padding && fwrite("\0\0", padding, 1, file_) < 1)
394  return false;
395  int id = nodes_.size();
396  unsigned checksum = ~(unsigned)id;
397  if (fwrite(&checksum, 4, 1, file_) < 1)
398  return false;
399  if (fflush(file_) != 0)
400  return false;
401 
402  node->set_id(id);
403  nodes_.push_back(node);
404 
405  return true;
406 }
const int kCurrentVersion
Definition: deps_log.cc:33
const char kFileSignature[]
Definition: deps_log.cc:32
vector< Deps * > deps_
Maps id -> deps of that id.
Definition: deps_log.h:116
Node * GetNode(StringPiece path)
Definition: state.cc:114
StringPiece represents a slice of a string whose memory is managed externally.
Definition: string_piece.h:27
Information about a node in the dependency graph: the file, whether it's dirty, mtime, etc.
Definition: graph.h:35
const unsigned kMaxRecordSize
Definition: deps_log.cc:37
Edge * in_edge() const
Definition: graph.h:80
Node ** nodes
Definition: deps_log.h:83
int TimeStamp
Definition: timestamp.h:22
void SetCloseOnExec(int fd)
Mark a file descriptor to not be inherited on exec()s.
Definition: util.cc:304
As build commands run they can output extra dependency information (e.g.
Definition: deps_log.h:66
vector< Node * > nodes_
Maps id -> Node.
Definition: deps_log.h:114
bool OpenForWrite(const string &path, string *err)
Definition: deps_log.cc:43
void set_id(int id)
Definition: graph.h:84
Deps * GetDeps(Node *node)
Definition: deps_log.cc:295
int node_count
Definition: deps_log.h:82
bool Load(const string &path, State *state, string *err)
Definition: deps_log.cc:164
bool Recompact(const string &path, string *err)
Rewrite the known log entries, throwing away old data.
Definition: deps_log.cc:303
bool needs_recompaction_
Definition: deps_log.h:110
bool RecordId(Node *node)
Definition: deps_log.cc:378
#define METRIC_RECORD(name)
The primary interface to metrics.
Definition: metrics.h:85
const string & path() const
Definition: graph.h:73
void Close()
Definition: deps_log.cc:158
int id() const
Definition: graph.h:83
FILE * file_
Definition: deps_log.h:111
bool Truncate(const string &path, size_t size, string *err)
Truncates a file to the given size.
Definition: util.cc:447
~DepsLog()
Definition: deps_log.cc:39
string GetBinding(const string &key)
Returns the shell-escaped value of |key|.
Definition: graph.cc:283
const vector< Deps * > & deps() const
Definition: deps_log.h:101
Global state (file status, loaded rules) for a single run.
Definition: state.h:83
bool RecordDeps(Node *node, TimeStamp mtime, const vector< Node * > &nodes)
Definition: deps_log.cc:80
bool UpdateDeps(int out_id, Deps *deps)
Definition: deps_log.cc:367
bool IsDepsEntryLiveFor(Node *node)
Returns if the deps entry for a node is still reachable from the manifest.
Definition: deps_log.cc:357